diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..8083008f91699db4e7c03a31c965f22009d8533d
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..50e4cc3f5d2c557323725f7b175661ac14affb8f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.lance filter=lfs diff=lfs merge=lfs -text
+*.idx filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..3b0d8f320349005754e2f9c1f63bbc5c89c21beb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,110 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+venv/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/.lancedb/model1_fixed.lance/_indices/4a9890c3-c8d5-4f2e-b1ec-5678a46c625b/index.idx b/.lancedb/model1_fixed.lance/_indices/4a9890c3-c8d5-4f2e-b1ec-5678a46c625b/index.idx
new file mode 100644
index 0000000000000000000000000000000000000000..971748e54c1e41d3b7c241d821f0ac98d7437312
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/_indices/4a9890c3-c8d5-4f2e-b1ec-5678a46c625b/index.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be17192e0f5a79a376bf775585a896413429c0c73bb9473530c1be6aba7b3f4c
+size 1013792
diff --git a/.lancedb/model1_fixed.lance/_transactions/0-84ab077d-9861-44d8-be21-3ed1140ecfe3.txn b/.lancedb/model1_fixed.lance/_transactions/0-84ab077d-9861-44d8-be21-3ed1140ecfe3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a57e9b9a670bf27e6f567e6f21bbbf3396691235
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/_transactions/0-84ab077d-9861-44d8-be21-3ed1140ecfe3.txn
@@ -0,0 +1 @@
+$84ab077d-9861-44d8-be21-3ed1140ecfe3�U2vector ���������*fixed_size_list:float:38408text ���������*string08
\ No newline at end of file
diff --git a/.lancedb/model1_fixed.lance/_transactions/1-46b62a4f-9a21-4f27-b9e9-e9676a1c656d.txn b/.lancedb/model1_fixed.lance/_transactions/1-46b62a4f-9a21-4f27-b9e9-e9676a1c656d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9a434f74488b35d34d162688ef3ca79a48ec9a54
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/1-46b62a4f-9a21-4f27-b9e9-e9676a1c656d.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/10-bb361984-1c0c-48db-9782-42927a9cef20.txn b/.lancedb/model1_fixed.lance/_transactions/10-bb361984-1c0c-48db-9782-42927a9cef20.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b74d0b3bf20235525d0127e40d4a03afc1db2f61
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/10-bb361984-1c0c-48db-9782-42927a9cef20.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/100-5aea14c0-5f89-4308-b19e-d0d433dc4a14.txn b/.lancedb/model1_fixed.lance/_transactions/100-5aea14c0-5f89-4308-b19e-d0d433dc4a14.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1862274d436f6c90a372fc778083c6499152dc5a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/100-5aea14c0-5f89-4308-b19e-d0d433dc4a14.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/101-a6d05c7a-f2ec-4924-aa0c-de90d30052e5.txn b/.lancedb/model1_fixed.lance/_transactions/101-a6d05c7a-f2ec-4924-aa0c-de90d30052e5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e4e1a67f8295dbc35b58dd5b139e73e37d18e80e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/101-a6d05c7a-f2ec-4924-aa0c-de90d30052e5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/102-69b6e9de-9015-4e32-839b-ed344b6af4c0.txn b/.lancedb/model1_fixed.lance/_transactions/102-69b6e9de-9015-4e32-839b-ed344b6af4c0.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8ffc63498492c0e11e6bd0209b3b88e4fa83c85a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/102-69b6e9de-9015-4e32-839b-ed344b6af4c0.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/103-35cc236d-59fd-4e65-afa4-63b65024aca3.txn b/.lancedb/model1_fixed.lance/_transactions/103-35cc236d-59fd-4e65-afa4-63b65024aca3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9e67868815e8a1d29972c98bb7da4f85e835bd6c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/103-35cc236d-59fd-4e65-afa4-63b65024aca3.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/104-a22a6170-fdbf-4ba7-9e9b-184150dedb89.txn b/.lancedb/model1_fixed.lance/_transactions/104-a22a6170-fdbf-4ba7-9e9b-184150dedb89.txn
new file mode 100644
index 0000000000000000000000000000000000000000..516143d82a7ea9939abb360e05fd4323fda99f2d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/104-a22a6170-fdbf-4ba7-9e9b-184150dedb89.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/105-6f968eef-eef6-445d-9b6e-a6c3ece109a5.txn b/.lancedb/model1_fixed.lance/_transactions/105-6f968eef-eef6-445d-9b6e-a6c3ece109a5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..68993a98b2917b1b8b61da6286ea030b3018007e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/105-6f968eef-eef6-445d-9b6e-a6c3ece109a5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/106-14417302-b8d2-4492-bcc2-604fecc9a83a.txn b/.lancedb/model1_fixed.lance/_transactions/106-14417302-b8d2-4492-bcc2-604fecc9a83a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..72579e10cc57afba92f4ca83100eae4a3f71636a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/106-14417302-b8d2-4492-bcc2-604fecc9a83a.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/107-34ea30c3-2738-4988-9452-cd456d44b94f.txn b/.lancedb/model1_fixed.lance/_transactions/107-34ea30c3-2738-4988-9452-cd456d44b94f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f1a7885e4806a5b7b74d6d9eed5263a2f605cc75
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/107-34ea30c3-2738-4988-9452-cd456d44b94f.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/108-4fac04b4-ad34-44c3-a6c2-48c86ab521d8.txn b/.lancedb/model1_fixed.lance/_transactions/108-4fac04b4-ad34-44c3-a6c2-48c86ab521d8.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c5a5ed6990a9fc53492ebb6511d0edc0ff180e17
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/108-4fac04b4-ad34-44c3-a6c2-48c86ab521d8.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/109-b63e4361-947b-43f8-ad1d-597662c05ddb.txn b/.lancedb/model1_fixed.lance/_transactions/109-b63e4361-947b-43f8-ad1d-597662c05ddb.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e98506f891532d3b220e214f4e85007c617cfdc5
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/109-b63e4361-947b-43f8-ad1d-597662c05ddb.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/11-0265e9d3-8851-4406-9537-141757900b3c.txn b/.lancedb/model1_fixed.lance/_transactions/11-0265e9d3-8851-4406-9537-141757900b3c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..91e6afbc2c09de620a4e61715cfd609d156f2b57
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/11-0265e9d3-8851-4406-9537-141757900b3c.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/110-77560f7a-a26f-45f1-a262-f761d43c8d99.txn b/.lancedb/model1_fixed.lance/_transactions/110-77560f7a-a26f-45f1-a262-f761d43c8d99.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d9b515cce39a6f6fdb2747e602cee684ccfe3765
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/110-77560f7a-a26f-45f1-a262-f761d43c8d99.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/111-94aafcdf-74eb-4b23-99e0-9645081601a4.txn b/.lancedb/model1_fixed.lance/_transactions/111-94aafcdf-74eb-4b23-99e0-9645081601a4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..41b069b8346ca2c7f30944a1a0287da19c537f67
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/111-94aafcdf-74eb-4b23-99e0-9645081601a4.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/112-84c0bd2c-a8b2-4cfa-853c-73da2f5fba97.txn b/.lancedb/model1_fixed.lance/_transactions/112-84c0bd2c-a8b2-4cfa-853c-73da2f5fba97.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0b05cb9d759c36f69000f36a21d538b38f532719
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/112-84c0bd2c-a8b2-4cfa-853c-73da2f5fba97.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/113-d2158477-e799-4c4d-b297-2a202ad083c5.txn b/.lancedb/model1_fixed.lance/_transactions/113-d2158477-e799-4c4d-b297-2a202ad083c5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..77131576e559b628e49e2468f9ac64ea8753df72
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/113-d2158477-e799-4c4d-b297-2a202ad083c5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/114-2b19a3e4-fb76-4b17-af0e-d5c2260974f8.txn b/.lancedb/model1_fixed.lance/_transactions/114-2b19a3e4-fb76-4b17-af0e-d5c2260974f8.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d2a4cb2b6d56fbd1456b39b3ffdc5171b11b886c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/114-2b19a3e4-fb76-4b17-af0e-d5c2260974f8.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/115-067cc079-1baa-4d99-962e-466cbcd19e29.txn b/.lancedb/model1_fixed.lance/_transactions/115-067cc079-1baa-4d99-962e-466cbcd19e29.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d6f879cad5bc66179ec7fc3f9b00b7a91f0d733b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/115-067cc079-1baa-4d99-962e-466cbcd19e29.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/116-6aedd380-686c-4b41-a5f2-74b7c2d7cbae.txn b/.lancedb/model1_fixed.lance/_transactions/116-6aedd380-686c-4b41-a5f2-74b7c2d7cbae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c5344259e873d89680475a6d4d5fb78188bc54f2
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/116-6aedd380-686c-4b41-a5f2-74b7c2d7cbae.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/117-5a3b3d62-e110-4561-8777-90e5d3c27570.txn b/.lancedb/model1_fixed.lance/_transactions/117-5a3b3d62-e110-4561-8777-90e5d3c27570.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a07e17aab74bfa49ae945695e71f077343117f56
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/117-5a3b3d62-e110-4561-8777-90e5d3c27570.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/118-c7d4ff17-4972-4f7c-bd95-bd8a00120772.txn b/.lancedb/model1_fixed.lance/_transactions/118-c7d4ff17-4972-4f7c-bd95-bd8a00120772.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e2e2abe92797ef12ad5afe88f46d80d127984329
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/118-c7d4ff17-4972-4f7c-bd95-bd8a00120772.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/119-7dde3c8d-27bb-4695-b830-e4dadb6ca518.txn b/.lancedb/model1_fixed.lance/_transactions/119-7dde3c8d-27bb-4695-b830-e4dadb6ca518.txn
new file mode 100644
index 0000000000000000000000000000000000000000..be9d0ece01daeb17991a3acd73c170d4f2f7203a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/119-7dde3c8d-27bb-4695-b830-e4dadb6ca518.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/12-bceb5d00-6d53-4216-bfc3-303991353a60.txn b/.lancedb/model1_fixed.lance/_transactions/12-bceb5d00-6d53-4216-bfc3-303991353a60.txn
new file mode 100644
index 0000000000000000000000000000000000000000..491c570618db5b7896a8b0e75ab704d9da732686
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/12-bceb5d00-6d53-4216-bfc3-303991353a60.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/120-0856b97f-9acf-437c-8732-030120449e80.txn b/.lancedb/model1_fixed.lance/_transactions/120-0856b97f-9acf-437c-8732-030120449e80.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b73d223b96ce2ae8cdd4e18da75776ba6679486d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/120-0856b97f-9acf-437c-8732-030120449e80.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/121-10843277-3346-4595-85d1-fdc527a1ccb6.txn b/.lancedb/model1_fixed.lance/_transactions/121-10843277-3346-4595-85d1-fdc527a1ccb6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e4eea3f4ec69232d3985267a2508deaf5eca3d9a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/121-10843277-3346-4595-85d1-fdc527a1ccb6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/122-cba70509-df29-4811-86bb-a89144ea1b37.txn b/.lancedb/model1_fixed.lance/_transactions/122-cba70509-df29-4811-86bb-a89144ea1b37.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ad4b4d9ba613705ab1de07005a7b20ca8d8f0f21
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/122-cba70509-df29-4811-86bb-a89144ea1b37.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/123-9e384283-60d8-4a88-816c-bbb3ee544439.txn b/.lancedb/model1_fixed.lance/_transactions/123-9e384283-60d8-4a88-816c-bbb3ee544439.txn
new file mode 100644
index 0000000000000000000000000000000000000000..70084ed546e54ce0dcf680aa56e098d440d79968
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/123-9e384283-60d8-4a88-816c-bbb3ee544439.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/124-f5231538-b4a1-44af-8ab3-deefda5da025.txn b/.lancedb/model1_fixed.lance/_transactions/124-f5231538-b4a1-44af-8ab3-deefda5da025.txn
new file mode 100644
index 0000000000000000000000000000000000000000..85b391719291b3ac5ae981ea0a1b162c575baab4
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/124-f5231538-b4a1-44af-8ab3-deefda5da025.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/125-3102c01b-9e23-4c9b-a7fc-443b3e4c2c51.txn b/.lancedb/model1_fixed.lance/_transactions/125-3102c01b-9e23-4c9b-a7fc-443b3e4c2c51.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9967f825b82c5fe457ad5bc31430909340744945
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/125-3102c01b-9e23-4c9b-a7fc-443b3e4c2c51.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/126-3d112427-9106-4042-9cdb-6d9e8844a00e.txn b/.lancedb/model1_fixed.lance/_transactions/126-3d112427-9106-4042-9cdb-6d9e8844a00e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..73130c6990f837734f723b2a38b9f70551b294a4
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/126-3d112427-9106-4042-9cdb-6d9e8844a00e.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/127-bea96de9-61c8-439e-93eb-f255d33eab71.txn b/.lancedb/model1_fixed.lance/_transactions/127-bea96de9-61c8-439e-93eb-f255d33eab71.txn
new file mode 100644
index 0000000000000000000000000000000000000000..bf8d2dc5dd093e47756f82b7142dabf49004a394
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/127-bea96de9-61c8-439e-93eb-f255d33eab71.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/128-c7f5ec68-2e35-47a4-bff3-02cf5ed60781.txn b/.lancedb/model1_fixed.lance/_transactions/128-c7f5ec68-2e35-47a4-bff3-02cf5ed60781.txn
new file mode 100644
index 0000000000000000000000000000000000000000..bd50897f251fc0dc802cc774c54f8cf17f02f6b3
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/128-c7f5ec68-2e35-47a4-bff3-02cf5ed60781.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/13-94cc5618-4b98-498d-a588-ca803d316dfd.txn b/.lancedb/model1_fixed.lance/_transactions/13-94cc5618-4b98-498d-a588-ca803d316dfd.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7f1699baacdf6153ba05ad497e808ed8a545dabd
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/13-94cc5618-4b98-498d-a588-ca803d316dfd.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/14-e7a45889-d809-498c-83f7-68594384a636.txn b/.lancedb/model1_fixed.lance/_transactions/14-e7a45889-d809-498c-83f7-68594384a636.txn
new file mode 100644
index 0000000000000000000000000000000000000000..df1c4738146e2e69dcef72ee56af59787366a85c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/14-e7a45889-d809-498c-83f7-68594384a636.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/15-8d13d72e-c4ba-4fa5-8e3d-b40f0837c8ea.txn b/.lancedb/model1_fixed.lance/_transactions/15-8d13d72e-c4ba-4fa5-8e3d-b40f0837c8ea.txn
new file mode 100644
index 0000000000000000000000000000000000000000..db9949d214ce4fef735841f8d4dee33325e8f788
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/15-8d13d72e-c4ba-4fa5-8e3d-b40f0837c8ea.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/16-e83f15e1-5663-4e2f-8ef9-a07bf1b867cb.txn b/.lancedb/model1_fixed.lance/_transactions/16-e83f15e1-5663-4e2f-8ef9-a07bf1b867cb.txn
new file mode 100644
index 0000000000000000000000000000000000000000..49e153abd47b645c8555bbcb7e18ee9609afc43d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/16-e83f15e1-5663-4e2f-8ef9-a07bf1b867cb.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/17-5cb2032a-1b4d-48cd-8492-f8e17eaa0112.txn b/.lancedb/model1_fixed.lance/_transactions/17-5cb2032a-1b4d-48cd-8492-f8e17eaa0112.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ae174ae9fc1491f80c7f9d4c5112c7ccaf4fbb5d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/17-5cb2032a-1b4d-48cd-8492-f8e17eaa0112.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/18-621f16a3-ca0d-4274-bd08-8c7836098059.txn b/.lancedb/model1_fixed.lance/_transactions/18-621f16a3-ca0d-4274-bd08-8c7836098059.txn
new file mode 100644
index 0000000000000000000000000000000000000000..de0581a0ede10fdb9533d29bf778c6ccdb38b13a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/18-621f16a3-ca0d-4274-bd08-8c7836098059.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/19-498b40c4-7979-4408-9f92-2c4b55ff1116.txn b/.lancedb/model1_fixed.lance/_transactions/19-498b40c4-7979-4408-9f92-2c4b55ff1116.txn
new file mode 100644
index 0000000000000000000000000000000000000000..41164a951f81b11614e188aeb4b2245cbe29a6cb
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/19-498b40c4-7979-4408-9f92-2c4b55ff1116.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/2-83a6fc76-8e1b-4561-82d9-8d099069ab23.txn b/.lancedb/model1_fixed.lance/_transactions/2-83a6fc76-8e1b-4561-82d9-8d099069ab23.txn
new file mode 100644
index 0000000000000000000000000000000000000000..16a3d88cd761e038187e2ad311bfa0a540e4ce07
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/2-83a6fc76-8e1b-4561-82d9-8d099069ab23.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/20-06934645-ff50-4ef7-a98a-cf4cac824f56.txn b/.lancedb/model1_fixed.lance/_transactions/20-06934645-ff50-4ef7-a98a-cf4cac824f56.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c9b1d61b7531ae4162f22c0961f825230146376b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/20-06934645-ff50-4ef7-a98a-cf4cac824f56.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/21-99c341a7-2763-4321-b5e3-488611fc00b6.txn b/.lancedb/model1_fixed.lance/_transactions/21-99c341a7-2763-4321-b5e3-488611fc00b6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3aa6b02720335f704690575da40daf2b16356b22
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/21-99c341a7-2763-4321-b5e3-488611fc00b6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/22-22c179d6-0030-47e3-98ed-48ce13ff0bc5.txn b/.lancedb/model1_fixed.lance/_transactions/22-22c179d6-0030-47e3-98ed-48ce13ff0bc5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e996d78ce7d03861e783b339f8073c2354c4234a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/22-22c179d6-0030-47e3-98ed-48ce13ff0bc5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/23-0baa10b4-2410-48b4-a3b1-f16c758da076.txn b/.lancedb/model1_fixed.lance/_transactions/23-0baa10b4-2410-48b4-a3b1-f16c758da076.txn
new file mode 100644
index 0000000000000000000000000000000000000000..38abcf9b333a7ae40b4634d86c8d26dc5c48cc22
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/23-0baa10b4-2410-48b4-a3b1-f16c758da076.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/24-856a71e2-54d5-41ed-bc1e-fe8963fe2912.txn b/.lancedb/model1_fixed.lance/_transactions/24-856a71e2-54d5-41ed-bc1e-fe8963fe2912.txn
new file mode 100644
index 0000000000000000000000000000000000000000..bf7d1d2f9565f7c3286ed6d8c0dd34f1f1c17077
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/24-856a71e2-54d5-41ed-bc1e-fe8963fe2912.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/25-49bf907f-4029-424c-bf66-1b9900dbf393.txn b/.lancedb/model1_fixed.lance/_transactions/25-49bf907f-4029-424c-bf66-1b9900dbf393.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7c898f7d59e95fd6155e2205d34f65a8e900a718
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/25-49bf907f-4029-424c-bf66-1b9900dbf393.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/26-15769fd6-ccbe-40f2-b1a4-c85ed8b83feb.txn b/.lancedb/model1_fixed.lance/_transactions/26-15769fd6-ccbe-40f2-b1a4-c85ed8b83feb.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b1ac9d7e4e994ddeccb33a2b8b8eccdd0dd7df15
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/26-15769fd6-ccbe-40f2-b1a4-c85ed8b83feb.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/27-e4041544-f578-4e0f-b62a-84889443b1d7.txn b/.lancedb/model1_fixed.lance/_transactions/27-e4041544-f578-4e0f-b62a-84889443b1d7.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a0eb07dff04f336c27627069f7572c24b74df2bf
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/27-e4041544-f578-4e0f-b62a-84889443b1d7.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/28-a1b6c288-acd4-4e8e-a231-4b28eff7c098.txn b/.lancedb/model1_fixed.lance/_transactions/28-a1b6c288-acd4-4e8e-a231-4b28eff7c098.txn
new file mode 100644
index 0000000000000000000000000000000000000000..92c686f12005b0487e781017b943733a22be75e3
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/28-a1b6c288-acd4-4e8e-a231-4b28eff7c098.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/29-3ed0ad81-0468-44bd-8cfc-9d9dc34e2259.txn b/.lancedb/model1_fixed.lance/_transactions/29-3ed0ad81-0468-44bd-8cfc-9d9dc34e2259.txn
new file mode 100644
index 0000000000000000000000000000000000000000..47341028acf6fa7c2c80f30f03e189ade0303bfb
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/29-3ed0ad81-0468-44bd-8cfc-9d9dc34e2259.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/3-90534772-1cd0-4a23-b258-cd835e2de65a.txn b/.lancedb/model1_fixed.lance/_transactions/3-90534772-1cd0-4a23-b258-cd835e2de65a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6b86199741d2de1075e4465085f4e0eedb508306
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/3-90534772-1cd0-4a23-b258-cd835e2de65a.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/30-26420bf1-9a55-43b6-a916-7861259517d6.txn b/.lancedb/model1_fixed.lance/_transactions/30-26420bf1-9a55-43b6-a916-7861259517d6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..fd4bb83eb25800c14d75f6a39f3f00f9ff69d4d6
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/30-26420bf1-9a55-43b6-a916-7861259517d6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/31-734e852b-9897-47d5-982e-b6f67562543f.txn b/.lancedb/model1_fixed.lance/_transactions/31-734e852b-9897-47d5-982e-b6f67562543f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..2ce0944dfc2188aebad6447afe5699d4bfec8cf6
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/31-734e852b-9897-47d5-982e-b6f67562543f.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/32-c2ad38b9-3f37-41ac-87e0-299a7c5d21c9.txn b/.lancedb/model1_fixed.lance/_transactions/32-c2ad38b9-3f37-41ac-87e0-299a7c5d21c9.txn
new file mode 100644
index 0000000000000000000000000000000000000000..03cfe82d4bdfe243571e558b0ff9b20913659b0d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/32-c2ad38b9-3f37-41ac-87e0-299a7c5d21c9.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/33-09bde3cf-4e75-44c2-9ead-87d91e1ecc15.txn b/.lancedb/model1_fixed.lance/_transactions/33-09bde3cf-4e75-44c2-9ead-87d91e1ecc15.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9f11962c54a63ee51d3ce852b3f6e01f6903951e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/33-09bde3cf-4e75-44c2-9ead-87d91e1ecc15.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/34-a493cf8a-f042-491d-90b7-44b10872b7ca.txn b/.lancedb/model1_fixed.lance/_transactions/34-a493cf8a-f042-491d-90b7-44b10872b7ca.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d55a8472fecd8cb80ea4ae0bc32cebef3a4a90a1
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/34-a493cf8a-f042-491d-90b7-44b10872b7ca.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/35-20b5e628-c9ff-4170-ad02-a088be6a760d.txn b/.lancedb/model1_fixed.lance/_transactions/35-20b5e628-c9ff-4170-ad02-a088be6a760d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..802aaea90755b7839323f1a34707a92a895b2434
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/35-20b5e628-c9ff-4170-ad02-a088be6a760d.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/36-a90e2e0d-4d23-4757-964f-7020d92bc4ac.txn b/.lancedb/model1_fixed.lance/_transactions/36-a90e2e0d-4d23-4757-964f-7020d92bc4ac.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0952b08675e9f6bf8cafa43e3d5b65c15ed06207
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/36-a90e2e0d-4d23-4757-964f-7020d92bc4ac.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/37-ee77517b-cb11-43ae-bdd7-bae53fe78074.txn b/.lancedb/model1_fixed.lance/_transactions/37-ee77517b-cb11-43ae-bdd7-bae53fe78074.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3eff9a54508a78011e6e6da602dfd658c6ad56ba
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/37-ee77517b-cb11-43ae-bdd7-bae53fe78074.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/38-577eb53c-66f3-494b-b6fd-b51a25d21af0.txn b/.lancedb/model1_fixed.lance/_transactions/38-577eb53c-66f3-494b-b6fd-b51a25d21af0.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0550ef64b297c635fd55f80e97833ad31dbb077a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/38-577eb53c-66f3-494b-b6fd-b51a25d21af0.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/39-067bc589-cce4-4e90-927f-f8d7f9477669.txn b/.lancedb/model1_fixed.lance/_transactions/39-067bc589-cce4-4e90-927f-f8d7f9477669.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c29e9d9ccca47668b8f5680d261632d977ac67c8
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/39-067bc589-cce4-4e90-927f-f8d7f9477669.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/4-abbee7ea-d1d5-4ea7-acda-52cb3e15263f.txn b/.lancedb/model1_fixed.lance/_transactions/4-abbee7ea-d1d5-4ea7-acda-52cb3e15263f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..903f167b5d87290c03d081cd5f49635668ee5ef3
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/4-abbee7ea-d1d5-4ea7-acda-52cb3e15263f.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/40-7bcb47bf-eb7c-4d03-9f2a-c462da336d59.txn b/.lancedb/model1_fixed.lance/_transactions/40-7bcb47bf-eb7c-4d03-9f2a-c462da336d59.txn
new file mode 100644
index 0000000000000000000000000000000000000000..890da5b8524839785083233191c5f4066571449e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/40-7bcb47bf-eb7c-4d03-9f2a-c462da336d59.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/41-e2254ba3-45dd-41df-bccc-2494b91fd5f0.txn b/.lancedb/model1_fixed.lance/_transactions/41-e2254ba3-45dd-41df-bccc-2494b91fd5f0.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d9542c856b42e22b0eb8acd948357d4a7de84213
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/41-e2254ba3-45dd-41df-bccc-2494b91fd5f0.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/42-373fa3e1-afb6-416b-9968-9e7c1f4e0366.txn b/.lancedb/model1_fixed.lance/_transactions/42-373fa3e1-afb6-416b-9968-9e7c1f4e0366.txn
new file mode 100644
index 0000000000000000000000000000000000000000..5fe7c8ee34e15330fb28b26d53e11b2736d7583b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/42-373fa3e1-afb6-416b-9968-9e7c1f4e0366.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/43-22a5da69-2198-40c1-a053-ef7e1334ed05.txn b/.lancedb/model1_fixed.lance/_transactions/43-22a5da69-2198-40c1-a053-ef7e1334ed05.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3dc531f0fab5753d1b4c01d3a430449145bbbbc5
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/43-22a5da69-2198-40c1-a053-ef7e1334ed05.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/44-f12f6b50-8f74-4f8f-8629-0870dfd5955c.txn b/.lancedb/model1_fixed.lance/_transactions/44-f12f6b50-8f74-4f8f-8629-0870dfd5955c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..fbc57bfdc5ccf68587274de49d3e81a281a15cfd
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/44-f12f6b50-8f74-4f8f-8629-0870dfd5955c.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/45-065799de-d90b-48b9-a1b1-99f0baffd0c6.txn b/.lancedb/model1_fixed.lance/_transactions/45-065799de-d90b-48b9-a1b1-99f0baffd0c6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b6f8b21dc74df846fc8698361724fbe77cb0f2a8
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/45-065799de-d90b-48b9-a1b1-99f0baffd0c6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/46-c1fc1ade-9d7e-48d1-bfc7-adb730d1a8fd.txn b/.lancedb/model1_fixed.lance/_transactions/46-c1fc1ade-9d7e-48d1-bfc7-adb730d1a8fd.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f1d9c57538f01fd1f599a48e052bf889f960d42f
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/46-c1fc1ade-9d7e-48d1-bfc7-adb730d1a8fd.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/47-99cabc87-f5f1-4fcf-9570-d117f10c62f4.txn b/.lancedb/model1_fixed.lance/_transactions/47-99cabc87-f5f1-4fcf-9570-d117f10c62f4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..59876489158812d791e79dc1831cc74000879291
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/47-99cabc87-f5f1-4fcf-9570-d117f10c62f4.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/48-bde790b3-e086-4e99-9fa8-19aa72f3e0e3.txn b/.lancedb/model1_fixed.lance/_transactions/48-bde790b3-e086-4e99-9fa8-19aa72f3e0e3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..28f5831986ce6a097d9888b1b0aa866d983151fb
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/48-bde790b3-e086-4e99-9fa8-19aa72f3e0e3.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/49-10da3fb7-bd60-4073-9744-25b96e77da4b.txn b/.lancedb/model1_fixed.lance/_transactions/49-10da3fb7-bd60-4073-9744-25b96e77da4b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f9033e6f95ad617c6888860d552435c66f8f3f54
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/49-10da3fb7-bd60-4073-9744-25b96e77da4b.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/5-a1f0522c-8b30-498d-b55d-e4adbb8086ac.txn b/.lancedb/model1_fixed.lance/_transactions/5-a1f0522c-8b30-498d-b55d-e4adbb8086ac.txn
new file mode 100644
index 0000000000000000000000000000000000000000..5cec3efcdded0455cce9cacda8c67bc4e93f768e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/5-a1f0522c-8b30-498d-b55d-e4adbb8086ac.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/50-6dfb19d2-3ea9-47c9-8d84-a63158c7534f.txn b/.lancedb/model1_fixed.lance/_transactions/50-6dfb19d2-3ea9-47c9-8d84-a63158c7534f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..2405324a3e02cbeb11518faec847af3fb65b4d3c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/50-6dfb19d2-3ea9-47c9-8d84-a63158c7534f.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/51-107024d0-6a89-4efe-95a5-9c9f120aa483.txn b/.lancedb/model1_fixed.lance/_transactions/51-107024d0-6a89-4efe-95a5-9c9f120aa483.txn
new file mode 100644
index 0000000000000000000000000000000000000000..747969e74bc68282e84e75182f564a9f4ba67da6
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/51-107024d0-6a89-4efe-95a5-9c9f120aa483.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/52-24239910-c4ba-47a6-ac9d-ce28f10543f8.txn b/.lancedb/model1_fixed.lance/_transactions/52-24239910-c4ba-47a6-ac9d-ce28f10543f8.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3ac987b5464f3ec3d3044f146d1ec3841d6cdc5b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/52-24239910-c4ba-47a6-ac9d-ce28f10543f8.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/53-48d86c43-61eb-42d2-a79a-c01bda44614c.txn b/.lancedb/model1_fixed.lance/_transactions/53-48d86c43-61eb-42d2-a79a-c01bda44614c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..59940b9720acbbb4c22152e168f0a640414750e4
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/53-48d86c43-61eb-42d2-a79a-c01bda44614c.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/54-ed6c064b-063e-44ef-a019-c7178350ad53.txn b/.lancedb/model1_fixed.lance/_transactions/54-ed6c064b-063e-44ef-a019-c7178350ad53.txn
new file mode 100644
index 0000000000000000000000000000000000000000..543443e80c9715fc24518142ed787eddba731554
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/54-ed6c064b-063e-44ef-a019-c7178350ad53.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/55-5b489b35-b787-4fff-bc79-85b6da25659c.txn b/.lancedb/model1_fixed.lance/_transactions/55-5b489b35-b787-4fff-bc79-85b6da25659c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ba15233e24fa06be0b973a1f08759d3053483fdc
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/55-5b489b35-b787-4fff-bc79-85b6da25659c.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/56-f38c83db-94ec-4eb9-990a-7ee5193e296d.txn b/.lancedb/model1_fixed.lance/_transactions/56-f38c83db-94ec-4eb9-990a-7ee5193e296d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f9e4380196797c2997d5597b0bd51aabe9e032d3
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/56-f38c83db-94ec-4eb9-990a-7ee5193e296d.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/57-b55bd261-72e7-4407-aeb0-f4319c72865d.txn b/.lancedb/model1_fixed.lance/_transactions/57-b55bd261-72e7-4407-aeb0-f4319c72865d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d2d80628cc3de16da19b7a1ccb8be31b66d485fe
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/57-b55bd261-72e7-4407-aeb0-f4319c72865d.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/58-a104b845-a3b9-4d92-9a91-7781cb862200.txn b/.lancedb/model1_fixed.lance/_transactions/58-a104b845-a3b9-4d92-9a91-7781cb862200.txn
new file mode 100644
index 0000000000000000000000000000000000000000..37f9c8423b4e481c03bd3fd3cf106e8d582d2074
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/58-a104b845-a3b9-4d92-9a91-7781cb862200.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/59-0272e3bf-cb32-4c1a-a6e0-4db7d9d61e03.txn b/.lancedb/model1_fixed.lance/_transactions/59-0272e3bf-cb32-4c1a-a6e0-4db7d9d61e03.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0540034a74b47434ccbcdbd3bbfde234531b1c2a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/59-0272e3bf-cb32-4c1a-a6e0-4db7d9d61e03.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/6-28b14d6d-5b10-4dc2-b6db-216a908c8991.txn b/.lancedb/model1_fixed.lance/_transactions/6-28b14d6d-5b10-4dc2-b6db-216a908c8991.txn
new file mode 100644
index 0000000000000000000000000000000000000000..689a18f25041f4303b6b158375cbb578edefef0f
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/6-28b14d6d-5b10-4dc2-b6db-216a908c8991.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/60-31ddf154-c8cb-406e-8576-f7826dfdf8f9.txn b/.lancedb/model1_fixed.lance/_transactions/60-31ddf154-c8cb-406e-8576-f7826dfdf8f9.txn
new file mode 100644
index 0000000000000000000000000000000000000000..5bec4967b47b98863f4101c6efeda82da55b079b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/60-31ddf154-c8cb-406e-8576-f7826dfdf8f9.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/61-e324eae7-75ef-4dbf-93d6-2048154195a6.txn b/.lancedb/model1_fixed.lance/_transactions/61-e324eae7-75ef-4dbf-93d6-2048154195a6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..fa669df3d97b860f354fc792a8ad0f13b3247170
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/61-e324eae7-75ef-4dbf-93d6-2048154195a6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/62-ef64d750-1701-4023-8975-ef64dcd8b0ce.txn b/.lancedb/model1_fixed.lance/_transactions/62-ef64d750-1701-4023-8975-ef64dcd8b0ce.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7a69e69f056e87af4503736f8ebb7710d1fd7a71
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/62-ef64d750-1701-4023-8975-ef64dcd8b0ce.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/63-ee999767-243d-40e6-8bf2-b496e97ebcd7.txn b/.lancedb/model1_fixed.lance/_transactions/63-ee999767-243d-40e6-8bf2-b496e97ebcd7.txn
new file mode 100644
index 0000000000000000000000000000000000000000..16e2d8afbb34b7c96518b20cb77c9966d866b292
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/63-ee999767-243d-40e6-8bf2-b496e97ebcd7.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/64-7a297e75-25ba-4075-9caf-0581542c0277.txn b/.lancedb/model1_fixed.lance/_transactions/64-7a297e75-25ba-4075-9caf-0581542c0277.txn
new file mode 100644
index 0000000000000000000000000000000000000000..dc0207a8b03c2ee0bb905cd9c18cc5946baa2793
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/64-7a297e75-25ba-4075-9caf-0581542c0277.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/65-cf8d14c5-2c37-46cf-a41c-934fbfab6b15.txn b/.lancedb/model1_fixed.lance/_transactions/65-cf8d14c5-2c37-46cf-a41c-934fbfab6b15.txn
new file mode 100644
index 0000000000000000000000000000000000000000..5962cf4b3d48a3936b4e8fa46227c5fcb09b0a25
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/65-cf8d14c5-2c37-46cf-a41c-934fbfab6b15.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/66-10eb11e4-1dc6-433c-8997-0e48cba97655.txn b/.lancedb/model1_fixed.lance/_transactions/66-10eb11e4-1dc6-433c-8997-0e48cba97655.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e152c81eeb0b00e5a5df69fecfa3e979dfb44d5d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/66-10eb11e4-1dc6-433c-8997-0e48cba97655.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/67-bf5acebf-1a3e-41d2-8b62-3e367968cbc4.txn b/.lancedb/model1_fixed.lance/_transactions/67-bf5acebf-1a3e-41d2-8b62-3e367968cbc4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8418054540f0e44afbb15a014d70067cf06798a7
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/67-bf5acebf-1a3e-41d2-8b62-3e367968cbc4.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/68-568a994f-20a2-4d34-a240-3c941e89de3a.txn b/.lancedb/model1_fixed.lance/_transactions/68-568a994f-20a2-4d34-a240-3c941e89de3a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6b5f670b45171b6753b7d35571863bc84e3bebb8
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/68-568a994f-20a2-4d34-a240-3c941e89de3a.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/69-07230566-572a-4856-82df-2074717a63a3.txn b/.lancedb/model1_fixed.lance/_transactions/69-07230566-572a-4856-82df-2074717a63a3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9fc63f143a23389b77e688240d61e38721e336fd
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/69-07230566-572a-4856-82df-2074717a63a3.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/7-c89411d8-1492-4f09-84a1-b9d0ac22fbdf.txn b/.lancedb/model1_fixed.lance/_transactions/7-c89411d8-1492-4f09-84a1-b9d0ac22fbdf.txn
new file mode 100644
index 0000000000000000000000000000000000000000..65573683b2c62a2ab72d6425a1abf1db9e923d4e
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/7-c89411d8-1492-4f09-84a1-b9d0ac22fbdf.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/70-ca49882b-b720-47f9-bae7-fe3b01e3f4ec.txn b/.lancedb/model1_fixed.lance/_transactions/70-ca49882b-b720-47f9-bae7-fe3b01e3f4ec.txn
new file mode 100644
index 0000000000000000000000000000000000000000..667626b60eb2cfbf55aa41bd9b7677fff28a88aa
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/70-ca49882b-b720-47f9-bae7-fe3b01e3f4ec.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/71-0e1b49c9-9dc2-4b1f-92fa-cfbed5048793.txn b/.lancedb/model1_fixed.lance/_transactions/71-0e1b49c9-9dc2-4b1f-92fa-cfbed5048793.txn
new file mode 100644
index 0000000000000000000000000000000000000000..72b3d75bf83e496ea8fd25ef7b8ff42f86c8c05c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/71-0e1b49c9-9dc2-4b1f-92fa-cfbed5048793.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/72-36c675a4-f2bb-44e2-8481-22a4b31145bd.txn b/.lancedb/model1_fixed.lance/_transactions/72-36c675a4-f2bb-44e2-8481-22a4b31145bd.txn
new file mode 100644
index 0000000000000000000000000000000000000000..dc4d14c8db9fcadd836e2edfd70142a2d7bf99d0
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/72-36c675a4-f2bb-44e2-8481-22a4b31145bd.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/73-762d6f9c-9ce7-4f24-8823-a3268f58eafb.txn b/.lancedb/model1_fixed.lance/_transactions/73-762d6f9c-9ce7-4f24-8823-a3268f58eafb.txn
new file mode 100644
index 0000000000000000000000000000000000000000..36996290c5355c10d9db2285b3871e68b7970419
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/73-762d6f9c-9ce7-4f24-8823-a3268f58eafb.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/74-b5017a34-be63-4325-aa62-816f3471976e.txn b/.lancedb/model1_fixed.lance/_transactions/74-b5017a34-be63-4325-aa62-816f3471976e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..36ee32efb960ecaebc010fb4d55dca792b58ef43
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/74-b5017a34-be63-4325-aa62-816f3471976e.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/75-cf4c3ef7-b1be-4f15-80a3-7c1b65950da5.txn b/.lancedb/model1_fixed.lance/_transactions/75-cf4c3ef7-b1be-4f15-80a3-7c1b65950da5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a7f0df5da8b1af3eed9f18a40030483a92259fee
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/75-cf4c3ef7-b1be-4f15-80a3-7c1b65950da5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/76-0f2622e7-c573-4232-8d79-e7320f3a602c.txn b/.lancedb/model1_fixed.lance/_transactions/76-0f2622e7-c573-4232-8d79-e7320f3a602c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6b471b979923ca3121e1bd122305c6b085f1afc2
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/76-0f2622e7-c573-4232-8d79-e7320f3a602c.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/77-5da0e873-6b58-413d-9b2f-dbcd3e9d605e.txn b/.lancedb/model1_fixed.lance/_transactions/77-5da0e873-6b58-413d-9b2f-dbcd3e9d605e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..49dd500530674c58ff20998660ff0c6b0d457df4
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/77-5da0e873-6b58-413d-9b2f-dbcd3e9d605e.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/78-eb54cbd6-48f6-49e8-b171-f90f6ed630a2.txn b/.lancedb/model1_fixed.lance/_transactions/78-eb54cbd6-48f6-49e8-b171-f90f6ed630a2.txn
new file mode 100644
index 0000000000000000000000000000000000000000..78962e11c140e78e76a9da690055655f9c9e4f05
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/78-eb54cbd6-48f6-49e8-b171-f90f6ed630a2.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/79-86642220-5fba-4661-b01c-834cb9f5d4b9.txn b/.lancedb/model1_fixed.lance/_transactions/79-86642220-5fba-4661-b01c-834cb9f5d4b9.txn
new file mode 100644
index 0000000000000000000000000000000000000000..80f6f7f89264e7b17834be5204962946fd91b64f
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/79-86642220-5fba-4661-b01c-834cb9f5d4b9.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/8-16cab04e-19c6-46da-9ae5-71f19edfe786.txn b/.lancedb/model1_fixed.lance/_transactions/8-16cab04e-19c6-46da-9ae5-71f19edfe786.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b750b55ed2a684fdb4fb61d30ffa40386eb31739
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/8-16cab04e-19c6-46da-9ae5-71f19edfe786.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/80-df746d17-3802-4760-9694-01c270c9e776.txn b/.lancedb/model1_fixed.lance/_transactions/80-df746d17-3802-4760-9694-01c270c9e776.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a65ab85ed4016a44372136e15edccc3f0909cefc
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/80-df746d17-3802-4760-9694-01c270c9e776.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/81-6b07b0e5-5d89-4aa7-a34d-7fbd0815b5a1.txn b/.lancedb/model1_fixed.lance/_transactions/81-6b07b0e5-5d89-4aa7-a34d-7fbd0815b5a1.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ba284072e7916c2c69db0d11fb580672f389eb95
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/81-6b07b0e5-5d89-4aa7-a34d-7fbd0815b5a1.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/82-6e3adb17-3377-45dd-b109-0403e8fc4cf5.txn b/.lancedb/model1_fixed.lance/_transactions/82-6e3adb17-3377-45dd-b109-0403e8fc4cf5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..2e0db06a6b57e48c77e3c281776e7c8c6e41cf2b
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/82-6e3adb17-3377-45dd-b109-0403e8fc4cf5.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/83-8f8f9de7-4274-48b5-a9c3-8c0f4dfc8bb8.txn b/.lancedb/model1_fixed.lance/_transactions/83-8f8f9de7-4274-48b5-a9c3-8c0f4dfc8bb8.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9633887e40af73fb2be11cbcc709b1f517ef8519
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/83-8f8f9de7-4274-48b5-a9c3-8c0f4dfc8bb8.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/84-6478baa4-6285-4d30-81ea-818de0c75f41.txn b/.lancedb/model1_fixed.lance/_transactions/84-6478baa4-6285-4d30-81ea-818de0c75f41.txn
new file mode 100644
index 0000000000000000000000000000000000000000..36e89f287d9b155ee94e6d893f64d6ea2a4ad76c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/84-6478baa4-6285-4d30-81ea-818de0c75f41.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/85-d9a17a3a-bf79-478b-be24-03c8876f2540.txn b/.lancedb/model1_fixed.lance/_transactions/85-d9a17a3a-bf79-478b-be24-03c8876f2540.txn
new file mode 100644
index 0000000000000000000000000000000000000000..4b55f16986347cd4bf811dd1dca439445776607d
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/85-d9a17a3a-bf79-478b-be24-03c8876f2540.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/86-77a28939-95b7-46e5-b252-ff84f2317185.txn b/.lancedb/model1_fixed.lance/_transactions/86-77a28939-95b7-46e5-b252-ff84f2317185.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b4bd7a24ff1963d7cc7601477a17ed70c0b6cc96
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/86-77a28939-95b7-46e5-b252-ff84f2317185.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/87-079e313f-84cb-4c9e-ad12-54d127733dda.txn b/.lancedb/model1_fixed.lance/_transactions/87-079e313f-84cb-4c9e-ad12-54d127733dda.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c91577540a23c6497c69fb4a2727890399c7deb5
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/87-079e313f-84cb-4c9e-ad12-54d127733dda.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/88-58f02408-e3b4-45b1-9811-0cda4bdef531.txn b/.lancedb/model1_fixed.lance/_transactions/88-58f02408-e3b4-45b1-9811-0cda4bdef531.txn
new file mode 100644
index 0000000000000000000000000000000000000000..024189a865a625e50f6f8245891b7573430cfd15
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/88-58f02408-e3b4-45b1-9811-0cda4bdef531.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/89-c870205e-a3c5-408e-8204-db63d9684e6a.txn b/.lancedb/model1_fixed.lance/_transactions/89-c870205e-a3c5-408e-8204-db63d9684e6a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..614b990a46310310aadac9b01e55fc539051d351
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/89-c870205e-a3c5-408e-8204-db63d9684e6a.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/9-ff712526-71bb-415a-9e41-b8278fca3ef1.txn b/.lancedb/model1_fixed.lance/_transactions/9-ff712526-71bb-415a-9e41-b8278fca3ef1.txn
new file mode 100644
index 0000000000000000000000000000000000000000..2b24ea00f12fe3540b743da50cace70d19a21ae9
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/9-ff712526-71bb-415a-9e41-b8278fca3ef1.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/90-eeb9e23d-b574-44b3-98be-2517fc70e4b6.txn b/.lancedb/model1_fixed.lance/_transactions/90-eeb9e23d-b574-44b3-98be-2517fc70e4b6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..553d8cd7da831065e5b4be05d872365f4c2fac38
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/90-eeb9e23d-b574-44b3-98be-2517fc70e4b6.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/91-e850cd4a-7c83-48ca-8cf9-099befd5c32b.txn b/.lancedb/model1_fixed.lance/_transactions/91-e850cd4a-7c83-48ca-8cf9-099befd5c32b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..caa1c1b058ba31b90ac31f1aa7b139cde93322a4
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/91-e850cd4a-7c83-48ca-8cf9-099befd5c32b.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/92-080464b6-2ac0-4bd4-9964-e51fff290561.txn b/.lancedb/model1_fixed.lance/_transactions/92-080464b6-2ac0-4bd4-9964-e51fff290561.txn
new file mode 100644
index 0000000000000000000000000000000000000000..dbbee649888085da7763f88bf57d902e7e5a2b12
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/92-080464b6-2ac0-4bd4-9964-e51fff290561.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/93-ee75bf05-7753-44fc-8d36-7e1a81b11ee2.txn b/.lancedb/model1_fixed.lance/_transactions/93-ee75bf05-7753-44fc-8d36-7e1a81b11ee2.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d1d823f4d34a653be420f12a514a85b249a0b979
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/93-ee75bf05-7753-44fc-8d36-7e1a81b11ee2.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/94-684a6305-c0f6-41a4-86de-9a1d03fb388b.txn b/.lancedb/model1_fixed.lance/_transactions/94-684a6305-c0f6-41a4-86de-9a1d03fb388b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..14fbb219f18b9be42a95cf337f06337b79e0f3a2
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/94-684a6305-c0f6-41a4-86de-9a1d03fb388b.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/95-a69200b4-18dd-4536-a446-3709b67900ec.txn b/.lancedb/model1_fixed.lance/_transactions/95-a69200b4-18dd-4536-a446-3709b67900ec.txn
new file mode 100644
index 0000000000000000000000000000000000000000..14fe91c573a86297782639baef8d43ee78c9c7b9
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/95-a69200b4-18dd-4536-a446-3709b67900ec.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/96-218e9ef4-56c2-4d20-b5ad-56dd98ffd153.txn b/.lancedb/model1_fixed.lance/_transactions/96-218e9ef4-56c2-4d20-b5ad-56dd98ffd153.txn
new file mode 100644
index 0000000000000000000000000000000000000000..49dfda20da6fff3bc9e00a1f4d8128154b1639fa
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/96-218e9ef4-56c2-4d20-b5ad-56dd98ffd153.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/97-93556346-a910-465f-b0a2-1ed686d9b661.txn b/.lancedb/model1_fixed.lance/_transactions/97-93556346-a910-465f-b0a2-1ed686d9b661.txn
new file mode 100644
index 0000000000000000000000000000000000000000..da97c75aac15fa7334fb5066da0db3396517ae48
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/97-93556346-a910-465f-b0a2-1ed686d9b661.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/98-df2ece44-a442-43f7-9d61-9ff7d4f1ee37.txn b/.lancedb/model1_fixed.lance/_transactions/98-df2ece44-a442-43f7-9d61-9ff7d4f1ee37.txn
new file mode 100644
index 0000000000000000000000000000000000000000..55209376eab91b9ddea94fc0c5db9246a6b94f5a
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/98-df2ece44-a442-43f7-9d61-9ff7d4f1ee37.txn differ
diff --git a/.lancedb/model1_fixed.lance/_transactions/99-bfc76eb2-b6c7-4f1a-82bb-e75da991679a.txn b/.lancedb/model1_fixed.lance/_transactions/99-bfc76eb2-b6c7-4f1a-82bb-e75da991679a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..aa961d013a34cf08d4b6e8d89703d59b8e260e2c
Binary files /dev/null and b/.lancedb/model1_fixed.lance/_transactions/99-bfc76eb2-b6c7-4f1a-82bb-e75da991679a.txn differ
diff --git a/.lancedb/model1_fixed.lance/data/0236f563-d18d-4f37-99e7-70818c416ac6.lance b/.lancedb/model1_fixed.lance/data/0236f563-d18d-4f37-99e7-70818c416ac6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..9a35c6265972de4e2beb557d803a6aa569af1566
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/0236f563-d18d-4f37-99e7-70818c416ac6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea518c0af21c52d338a543f19fbd53c5398a388c9306822fca5baa98d02f2c0a
+size 67008
diff --git a/.lancedb/model1_fixed.lance/data/023d07be-aca0-45f3-b5a3-6ab93efe774d.lance b/.lancedb/model1_fixed.lance/data/023d07be-aca0-45f3-b5a3-6ab93efe774d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2aab7ec59520f2008bcbd1cb57c69a3e3215dd29
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/023d07be-aca0-45f3-b5a3-6ab93efe774d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ea64bf5545f41b8e5c07a7ed0193ff9bd457cb2a052cc11847bfd44d84de7fd
+size 66699
diff --git a/.lancedb/model1_fixed.lance/data/08628c90-269a-4b93-b7d2-0bda22c6e45e.lance b/.lancedb/model1_fixed.lance/data/08628c90-269a-4b93-b7d2-0bda22c6e45e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7123859fa359ad6a7ee8ba6a3f7babcd4805ccea
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/08628c90-269a-4b93-b7d2-0bda22c6e45e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44467de17a1fb5e53dbfc338e2f59969f3512a112a73193a52697a721a03f4b8
+size 70445
diff --git a/.lancedb/model1_fixed.lance/data/0aa332a5-d933-4b1e-86d7-fefcd2268b36.lance b/.lancedb/model1_fixed.lance/data/0aa332a5-d933-4b1e-86d7-fefcd2268b36.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f2aba0c8f28c957b01c57a79f33a4caa8ae7627f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/0aa332a5-d933-4b1e-86d7-fefcd2268b36.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10aad4dc84c3ab418969af972fc63a84a8d3726f2d45f0390bac3ef673800d13
+size 66786
diff --git a/.lancedb/model1_fixed.lance/data/10ea440e-c02e-4040-9cc5-fed8374efa5a.lance b/.lancedb/model1_fixed.lance/data/10ea440e-c02e-4040-9cc5-fed8374efa5a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e83acb795ce21a8ddad1df1df2ea8270d9b2f8b4
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/10ea440e-c02e-4040-9cc5-fed8374efa5a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c95ec42f313600bb5d29d3aa253db543e5f1e5cad014f8ab1b20b33c20ea5f9
+size 73283
diff --git a/.lancedb/model1_fixed.lance/data/120a635e-c8d4-4b20-88ac-295387d74662.lance b/.lancedb/model1_fixed.lance/data/120a635e-c8d4-4b20-88ac-295387d74662.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f2ae32638cd73677deb6c5b19ca4125a21c84019
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/120a635e-c8d4-4b20-88ac-295387d74662.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8132798ca4b1c9b2766f093139b709c27cf75da164b9705b7b83bdd3cb7bf2f1
+size 71061
diff --git a/.lancedb/model1_fixed.lance/data/143a63e8-a472-49d7-ab9c-258ff82f8dbf.lance b/.lancedb/model1_fixed.lance/data/143a63e8-a472-49d7-ab9c-258ff82f8dbf.lance
new file mode 100644
index 0000000000000000000000000000000000000000..09a96685da51c6bb6b0dff96d65bf9ece37ffedf
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/143a63e8-a472-49d7-ab9c-258ff82f8dbf.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b4c54af071de51de8a14926240fd46529ac2157b24613480943a8894faa88f6
+size 64599
diff --git a/.lancedb/model1_fixed.lance/data/176fedd7-96f4-4f61-b153-d611d681313c.lance b/.lancedb/model1_fixed.lance/data/176fedd7-96f4-4f61-b153-d611d681313c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..12c9c7481d417ad25cde4cc5fe2b8fa51ab4ce04
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/176fedd7-96f4-4f61-b153-d611d681313c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b761710ead08fa40e95bb73253cace0a7c6a986dbc3ba39fd831d6b12deb4952
+size 80634
diff --git a/.lancedb/model1_fixed.lance/data/1936098e-dc85-486c-842b-d736d37b785d.lance b/.lancedb/model1_fixed.lance/data/1936098e-dc85-486c-842b-d736d37b785d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ad198693831a679225b4de944ed8bf9666ebf8b9
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/1936098e-dc85-486c-842b-d736d37b785d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a49a0d19947f9e0c00acccd5adb8364fd3ca24a70fdf54d53a05d4a19dadcc73
+size 72854
diff --git a/.lancedb/model1_fixed.lance/data/207ab593-3007-4332-95cc-2622f4e7de16.lance b/.lancedb/model1_fixed.lance/data/207ab593-3007-4332-95cc-2622f4e7de16.lance
new file mode 100644
index 0000000000000000000000000000000000000000..162ff9557e3c06c8abe752fc2a5e3bf3adc5ef6e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/207ab593-3007-4332-95cc-2622f4e7de16.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c85be31b061bbbcb329530bc3015e931add7e4a13190592bbc05b5de01724cb
+size 69416
diff --git a/.lancedb/model1_fixed.lance/data/233d8295-49f4-463a-9b87-f52ca6db0fa6.lance b/.lancedb/model1_fixed.lance/data/233d8295-49f4-463a-9b87-f52ca6db0fa6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d52b75242fe0367ac29c41840fcfc793c1aa5d74
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/233d8295-49f4-463a-9b87-f52ca6db0fa6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3329f1632de1d3a65922dc49720de15f6e86f338d66e44d10689201fe83d42b
+size 75312
diff --git a/.lancedb/model1_fixed.lance/data/24eb51a6-9f19-43a4-bc66-aa5af34aba8b.lance b/.lancedb/model1_fixed.lance/data/24eb51a6-9f19-43a4-bc66-aa5af34aba8b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e40e9a843ff3f1644c9a5d0ba54f2fb2be89fe18
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/24eb51a6-9f19-43a4-bc66-aa5af34aba8b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b380796b033ceea1a20da63c0866d2a0ca3e370b65a28f5b06c65f1de602980c
+size 66395
diff --git a/.lancedb/model1_fixed.lance/data/287c2895-7ca8-4a70-a321-d758c0a66429.lance b/.lancedb/model1_fixed.lance/data/287c2895-7ca8-4a70-a321-d758c0a66429.lance
new file mode 100644
index 0000000000000000000000000000000000000000..531d501cf8a5dd78e4857c7a64916a4e0e58c8f2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/287c2895-7ca8-4a70-a321-d758c0a66429.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc813885323761e4cb2ee2fec122c420e99d77dda6f9bf94a1b36e425335b0a3
+size 70764
diff --git a/.lancedb/model1_fixed.lance/data/2922cf95-932d-4819-9080-66e8a80f1ec2.lance b/.lancedb/model1_fixed.lance/data/2922cf95-932d-4819-9080-66e8a80f1ec2.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b54559690b3052b5f307f516226174e864ccf1f1
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2922cf95-932d-4819-9080-66e8a80f1ec2.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6f9c67ea4a548b9570c55f9a9e2e8386ec6a05d4f2a978a661c8a3f82759a27
+size 69983
diff --git a/.lancedb/model1_fixed.lance/data/2951d56b-9f68-43ac-a555-7341d5bc8a7b.lance b/.lancedb/model1_fixed.lance/data/2951d56b-9f68-43ac-a555-7341d5bc8a7b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..134d4de38e66e8ec63480cb87b695542e06ad9f7
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2951d56b-9f68-43ac-a555-7341d5bc8a7b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2eebe41ce14b25818d5a1e7633a5554bdef95ae19580fe148ec62f31cb5079
+size 67102
diff --git a/.lancedb/model1_fixed.lance/data/2ab0a661-7092-4fa6-b40d-dcca2e3dbc62.lance b/.lancedb/model1_fixed.lance/data/2ab0a661-7092-4fa6-b40d-dcca2e3dbc62.lance
new file mode 100644
index 0000000000000000000000000000000000000000..913cdfae868625e5ef2da69518426de2525e5053
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2ab0a661-7092-4fa6-b40d-dcca2e3dbc62.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b23dad2cc3dd443d05daf841a55aa114068c3c42d59196f769c9fb122e4c6239
+size 72162
diff --git a/.lancedb/model1_fixed.lance/data/2b36abb4-2626-42fd-a289-68c0a2fda21b.lance b/.lancedb/model1_fixed.lance/data/2b36abb4-2626-42fd-a289-68c0a2fda21b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..1a0afc55a1c513755c501ae10d722dfaab457d55
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2b36abb4-2626-42fd-a289-68c0a2fda21b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55abf9b0845af6496d8439203294769dbd68ec515e8f663b9595815458ed39f7
+size 65274
diff --git a/.lancedb/model1_fixed.lance/data/2c3af81b-0064-40fe-be4a-ad70cc23d6ed.lance b/.lancedb/model1_fixed.lance/data/2c3af81b-0064-40fe-be4a-ad70cc23d6ed.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d7012ab0d11d305ed84acf7f6cff7d19f52a1c80
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2c3af81b-0064-40fe-be4a-ad70cc23d6ed.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb051c67d6a8b21e2b094688fef6395c78f5363c6c4c53b4e599240425003d42
+size 68270
diff --git a/.lancedb/model1_fixed.lance/data/2e25d17b-39cb-4a94-b4bd-9ef9bea704e1.lance b/.lancedb/model1_fixed.lance/data/2e25d17b-39cb-4a94-b4bd-9ef9bea704e1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f0cebb439c3ec38ac8c7e266134dea3225583b06
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/2e25d17b-39cb-4a94-b4bd-9ef9bea704e1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48dd3f45c85751c0e05a50297e9dbf838a65ba35c1e131b5694e65932e514e36
+size 68970
diff --git a/.lancedb/model1_fixed.lance/data/32b4e656-bdcc-4925-a3b5-6fb31420dd71.lance b/.lancedb/model1_fixed.lance/data/32b4e656-bdcc-4925-a3b5-6fb31420dd71.lance
new file mode 100644
index 0000000000000000000000000000000000000000..19f9816b696fd0acda2bfba137a453258787a503
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/32b4e656-bdcc-4925-a3b5-6fb31420dd71.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4cf2965ad2f6c2a7eb86e2e06b5609a2860c3af4a360d1cfb07981e7218beaf
+size 69806
diff --git a/.lancedb/model1_fixed.lance/data/33ec7545-1c26-48cb-8c94-a4d627342d66.lance b/.lancedb/model1_fixed.lance/data/33ec7545-1c26-48cb-8c94-a4d627342d66.lance
new file mode 100644
index 0000000000000000000000000000000000000000..959876423ad2c2c2f557a4671c9713fe4b164bde
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/33ec7545-1c26-48cb-8c94-a4d627342d66.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3af230d12e8a91008cbaa1a417601b2bf959e0eb5265618188d6f21448c328b9
+size 70836
diff --git a/.lancedb/model1_fixed.lance/data/34a25056-f54a-4d88-9ac1-f13836e89117.lance b/.lancedb/model1_fixed.lance/data/34a25056-f54a-4d88-9ac1-f13836e89117.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e9b468a5a14c16d9a484a78fa2d24360ca01f304
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/34a25056-f54a-4d88-9ac1-f13836e89117.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52f2a4ba64cc49164a0c34fd006ddd0410711d2dc18ba887587dec37f715207d
+size 70476
diff --git a/.lancedb/model1_fixed.lance/data/3984cfe8-f50f-4f88-9bf3-52f594d748e6.lance b/.lancedb/model1_fixed.lance/data/3984cfe8-f50f-4f88-9bf3-52f594d748e6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ffc09a5717923ba92283440bc4cce04bd215db13
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/3984cfe8-f50f-4f88-9bf3-52f594d748e6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de8ea8a5854751fc738906a98ad7be7619d8346659df848f156d2f229b1d8531
+size 71637
diff --git a/.lancedb/model1_fixed.lance/data/3a370707-ce6d-4057-b447-64720840fad4.lance b/.lancedb/model1_fixed.lance/data/3a370707-ce6d-4057-b447-64720840fad4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..aa2cbb8acd2aca02b3240426c423a7c7de8d40fc
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/3a370707-ce6d-4057-b447-64720840fad4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6430044457bc03948cb82b2b0942f032a18db6e17662cd3aff75f2e813548e47
+size 64961
diff --git a/.lancedb/model1_fixed.lance/data/3c2ea04e-c53b-41ac-a2fc-26556eb25a30.lance b/.lancedb/model1_fixed.lance/data/3c2ea04e-c53b-41ac-a2fc-26556eb25a30.lance
new file mode 100644
index 0000000000000000000000000000000000000000..50924fa2bbb4f186e85ea78b626f67850995cf16
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/3c2ea04e-c53b-41ac-a2fc-26556eb25a30.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b6c867084d6de8cd9f443cfc50071216c7b517e7998570e79a6c12f0133f6b3
+size 69069
diff --git a/.lancedb/model1_fixed.lance/data/3cd7e631-9fe6-4bee-bf7a-c3f85f588ecc.lance b/.lancedb/model1_fixed.lance/data/3cd7e631-9fe6-4bee-bf7a-c3f85f588ecc.lance
new file mode 100644
index 0000000000000000000000000000000000000000..aa22d0428fdb172c96071ce2b7a3a0eba4cf4a6a
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/3cd7e631-9fe6-4bee-bf7a-c3f85f588ecc.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:864f19168942ab308d3c3f0a52a688eb401186b61b647b353e76ecd7037d5a50
+size 63982
diff --git a/.lancedb/model1_fixed.lance/data/4052c149-286f-426b-87ee-d65399484fc7.lance b/.lancedb/model1_fixed.lance/data/4052c149-286f-426b-87ee-d65399484fc7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2a5d08029dde36ac47a8f1adcb399c26c262cc66
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4052c149-286f-426b-87ee-d65399484fc7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d13f12bac2aba0dadd3a017c7d659e458106cdee26bbce396c6851f99ce2a2af
+size 65433
diff --git a/.lancedb/model1_fixed.lance/data/42d1d9af-51bf-437e-9bf2-34f77a7efd97.lance b/.lancedb/model1_fixed.lance/data/42d1d9af-51bf-437e-9bf2-34f77a7efd97.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d3e7ce770ec0b2a4c5e3678c576b55a38e4f8f4f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/42d1d9af-51bf-437e-9bf2-34f77a7efd97.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53ee90a0aa246d91efd1981ef59ee1d1fc76042f4c339f6dbf6a8d016328006e
+size 69001
diff --git a/.lancedb/model1_fixed.lance/data/4502aa2e-7969-4eec-ab55-4ffba0fe5246.lance b/.lancedb/model1_fixed.lance/data/4502aa2e-7969-4eec-ab55-4ffba0fe5246.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4bb491af5beed63e6c0ff6fcba2be3630b3737de
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4502aa2e-7969-4eec-ab55-4ffba0fe5246.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b22e52994db921aa29e54f9b3269957fa37267555d11977aeabe28c444bcc557
+size 65882
diff --git a/.lancedb/model1_fixed.lance/data/461b541d-cc8d-4e4d-9ae6-058ff8a65bb5.lance b/.lancedb/model1_fixed.lance/data/461b541d-cc8d-4e4d-9ae6-058ff8a65bb5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..492364164f70833c96df9ceb29b4742eee1df7b7
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/461b541d-cc8d-4e4d-9ae6-058ff8a65bb5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3a72a21f06d9fb98d3768d4570104db885ef0c51e3b6aecef12e30e6b6c1762
+size 73413
diff --git a/.lancedb/model1_fixed.lance/data/46557cb7-fa7c-4f56-82fa-169e929bed1f.lance b/.lancedb/model1_fixed.lance/data/46557cb7-fa7c-4f56-82fa-169e929bed1f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f0d1674d4ffce4702f9302063b43a46bb324627b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/46557cb7-fa7c-4f56-82fa-169e929bed1f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ae7b10cccb2320862fbe5e2d56d726f4769cd490c9283a11b944da3f5289bc7
+size 73897
diff --git a/.lancedb/model1_fixed.lance/data/4718b33f-a079-4b03-bced-9a6779f186f3.lance b/.lancedb/model1_fixed.lance/data/4718b33f-a079-4b03-bced-9a6779f186f3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7b29353293ba2708497af40f15d015fc1bb2d7e2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4718b33f-a079-4b03-bced-9a6779f186f3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c678c08c604b434c887a34c121acd39844430f2ff95c79ee17e00f72c5a91bc
+size 69702
diff --git a/.lancedb/model1_fixed.lance/data/4a777bfd-a405-41a1-85bd-a4bac513ec77.lance b/.lancedb/model1_fixed.lance/data/4a777bfd-a405-41a1-85bd-a4bac513ec77.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d9c1cae659695270c9a83645478be5ce21d721a6
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4a777bfd-a405-41a1-85bd-a4bac513ec77.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2380c2d7cff37cb7e6fe22f2189a7fe89e0f2b36697ef35602b8dee82d988518
+size 68401
diff --git a/.lancedb/model1_fixed.lance/data/4bc456af-451e-4e87-af13-9e5f9ea6f3f7.lance b/.lancedb/model1_fixed.lance/data/4bc456af-451e-4e87-af13-9e5f9ea6f3f7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f0eef05e144c7a58c1b151bb19dd83bfafeaf76d
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4bc456af-451e-4e87-af13-9e5f9ea6f3f7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:106aecc306ed224c6c152028c400d653a5e08608ac3a78bfce751202c41f73a8
+size 79068
diff --git a/.lancedb/model1_fixed.lance/data/4e9ff796-b0f6-4cf1-98c0-8e0a457c4937.lance b/.lancedb/model1_fixed.lance/data/4e9ff796-b0f6-4cf1-98c0-8e0a457c4937.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0519b30eec8cf5e069b73874abe3f92409782a66
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/4e9ff796-b0f6-4cf1-98c0-8e0a457c4937.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a1b8a16cdf3ecb1f0c5d38552aad4609e752021259e3a9687c1b8d7244245ab
+size 74854
diff --git a/.lancedb/model1_fixed.lance/data/50a3ed1a-88a0-4645-b38b-73d05661490b.lance b/.lancedb/model1_fixed.lance/data/50a3ed1a-88a0-4645-b38b-73d05661490b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..bb3c1186ea5dd34f5cf6aa1065b799b56fca3790
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/50a3ed1a-88a0-4645-b38b-73d05661490b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4f97c94abcd15803e88288e82bc941a322040f141efac0b93b54e8503be714e
+size 70547
diff --git a/.lancedb/model1_fixed.lance/data/50c89209-097f-4672-838e-a0f7ca44c2c7.lance b/.lancedb/model1_fixed.lance/data/50c89209-097f-4672-838e-a0f7ca44c2c7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..29f651966386c986a67dda27d8519f4f60c81911
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/50c89209-097f-4672-838e-a0f7ca44c2c7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c88c53428752f970e9693f47b798567d6867fd05a4f5672f22f6092d38e758
+size 77737
diff --git a/.lancedb/model1_fixed.lance/data/50ce8f14-52c7-492e-93f6-a4e83e2b00eb.lance b/.lancedb/model1_fixed.lance/data/50ce8f14-52c7-492e-93f6-a4e83e2b00eb.lance
new file mode 100644
index 0000000000000000000000000000000000000000..8507d393ab4c18414895c9833b7ee5fbfe3ff3de
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/50ce8f14-52c7-492e-93f6-a4e83e2b00eb.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a2be8760e2712a727b632db5df66bf49c980f97a6365803b88809498e9b2cef
+size 68035
diff --git a/.lancedb/model1_fixed.lance/data/5a93ffaa-e9ad-421a-8dc6-bb0ed9fc7775.lance b/.lancedb/model1_fixed.lance/data/5a93ffaa-e9ad-421a-8dc6-bb0ed9fc7775.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a8a4755c34b8568eae9e4d3de8c1f22c41147ed1
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/5a93ffaa-e9ad-421a-8dc6-bb0ed9fc7775.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cfbea7d74c7634cf2731b70a05a27cf3f1084d5da4552180ed148ddb0cc25bf
+size 74066
diff --git a/.lancedb/model1_fixed.lance/data/5d464c59-d1e0-460e-bcaa-2b71ae24ea13.lance b/.lancedb/model1_fixed.lance/data/5d464c59-d1e0-460e-bcaa-2b71ae24ea13.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ca72d8b823da23c7c3c679977b0a03dda4d3dfee
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/5d464c59-d1e0-460e-bcaa-2b71ae24ea13.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ae2084ea9ca7e69dcc5b22da14e4d71d12fe4bf8f88e3eb2a1e2a3168ed232
+size 79800
diff --git a/.lancedb/model1_fixed.lance/data/5d62ce2b-8ff3-4b53-a465-25f4d5624277.lance b/.lancedb/model1_fixed.lance/data/5d62ce2b-8ff3-4b53-a465-25f4d5624277.lance
new file mode 100644
index 0000000000000000000000000000000000000000..32e886c854c66967263b6bef54fd9a7539073aa6
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/5d62ce2b-8ff3-4b53-a465-25f4d5624277.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35486826e8cd2bb1b6a7baaa7772321e971265d64d76067ce414e218546942e4
+size 72547
diff --git a/.lancedb/model1_fixed.lance/data/5efd598d-f6d7-4cec-9e87-d2e781e10c8f.lance b/.lancedb/model1_fixed.lance/data/5efd598d-f6d7-4cec-9e87-d2e781e10c8f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..1ca58f80740775e95b0a94c01220954cca5a12ee
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/5efd598d-f6d7-4cec-9e87-d2e781e10c8f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:622cd836faed637e9fb7227a88ad1804272a6fbcf05fd1c85835f9d0fc40d805
+size 71493
diff --git a/.lancedb/model1_fixed.lance/data/60f4a9a9-795d-4fac-9639-756c0d809502.lance b/.lancedb/model1_fixed.lance/data/60f4a9a9-795d-4fac-9639-756c0d809502.lance
new file mode 100644
index 0000000000000000000000000000000000000000..66a1d5639480c07d52149ad419e15351c340a7df
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/60f4a9a9-795d-4fac-9639-756c0d809502.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cce55aeb0debb040f76ef8cf33f58a937b08a3fc6836589300e3c82ee65255ec
+size 67717
diff --git a/.lancedb/model1_fixed.lance/data/619cb327-01b7-4bef-9010-3b07f3e55070.lance b/.lancedb/model1_fixed.lance/data/619cb327-01b7-4bef-9010-3b07f3e55070.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4405713f2cd6e8aac830ccf4c41ed3c6479cb3d3
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/619cb327-01b7-4bef-9010-3b07f3e55070.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efabacf394b67b3e2755f24f448b5472b3858156d2a1e2a818c04f824a275d69
+size 75657
diff --git a/.lancedb/model1_fixed.lance/data/649e391e-3f50-4260-b139-05cb7f9f5c2c.lance b/.lancedb/model1_fixed.lance/data/649e391e-3f50-4260-b139-05cb7f9f5c2c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..36362e9854f6eae1ff5b30b4a47f99dc7105f6fb
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/649e391e-3f50-4260-b139-05cb7f9f5c2c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12f1b18924e72b910a5b3b42dea6ca913d8a10b20f888d30cc59dd8eea7bcdca
+size 66961
diff --git a/.lancedb/model1_fixed.lance/data/67204198-39e5-4a38-b955-6bc059b1dc3e.lance b/.lancedb/model1_fixed.lance/data/67204198-39e5-4a38-b955-6bc059b1dc3e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6bac4e63bad23d486b227aae5ef6e774c2e02f7f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/67204198-39e5-4a38-b955-6bc059b1dc3e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c71573ede83ac577f85c6cc18c5d0882c6984a9ccb1d435e5b5526cc66bae47d
+size 75255
diff --git a/.lancedb/model1_fixed.lance/data/677919af-5e0f-43f8-b961-5ff54dc8693a.lance b/.lancedb/model1_fixed.lance/data/677919af-5e0f-43f8-b961-5ff54dc8693a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..3fd43788e7c91a701f36cfddbd8b877eba0bddf8
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/677919af-5e0f-43f8-b961-5ff54dc8693a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80e9d9c9d321261127d025ea2df86f64606293dc8de8f17529f34a74200079e3
+size 70290
diff --git a/.lancedb/model1_fixed.lance/data/6ab209be-5ced-43b0-87a7-0191b90d2371.lance b/.lancedb/model1_fixed.lance/data/6ab209be-5ced-43b0-87a7-0191b90d2371.lance
new file mode 100644
index 0000000000000000000000000000000000000000..abe9e182b7c4842a3998f515915afb373daf6520
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/6ab209be-5ced-43b0-87a7-0191b90d2371.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c6e0dc68ef12c9857690a672255240a7a7317b3bf450a00656ad002da9427ea
+size 83983
diff --git a/.lancedb/model1_fixed.lance/data/6b3ae8f2-4334-41b5-93e2-b8ff85b1c27b.lance b/.lancedb/model1_fixed.lance/data/6b3ae8f2-4334-41b5-93e2-b8ff85b1c27b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4856f5b4c44c27c2ef30bc4d3bf67ae6c60d310d
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/6b3ae8f2-4334-41b5-93e2-b8ff85b1c27b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf395c888c60818dd0e29baedd7d63dcf56ad327321e6188bad43ec53fa139b3
+size 72092
diff --git a/.lancedb/model1_fixed.lance/data/6f30dd78-2c78-4281-8f87-4dd7e294d548.lance b/.lancedb/model1_fixed.lance/data/6f30dd78-2c78-4281-8f87-4dd7e294d548.lance
new file mode 100644
index 0000000000000000000000000000000000000000..45042ed8f7aa65635d2dd5c5e75246afe874acae
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/6f30dd78-2c78-4281-8f87-4dd7e294d548.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab5567b8f8a45271241afbf93e1e31e425c8a52ecaa72f18e6176faf847701c4
+size 76108
diff --git a/.lancedb/model1_fixed.lance/data/71a26789-a082-4562-9244-7715aede643d.lance b/.lancedb/model1_fixed.lance/data/71a26789-a082-4562-9244-7715aede643d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..263f11fc51cd501a9e7098f79c16a0098e178161
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/71a26789-a082-4562-9244-7715aede643d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ff7c62a827aa75a64f88bcecdfe291a82e11b6c2fc423e011e2da0f4c48fcea
+size 73077
diff --git a/.lancedb/model1_fixed.lance/data/71cb81cc-73c2-4dca-aff1-5830ac441d80.lance b/.lancedb/model1_fixed.lance/data/71cb81cc-73c2-4dca-aff1-5830ac441d80.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a30cb8f86a4c9e69295a2499347e50dccc3a2a5c
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/71cb81cc-73c2-4dca-aff1-5830ac441d80.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a060e172a85f8b497221d9c74aa66424045b9f988da22a60c2c4e8130b193213
+size 70526
diff --git a/.lancedb/model1_fixed.lance/data/732b8bb1-f2fc-42b4-b3d1-3a830cc672bc.lance b/.lancedb/model1_fixed.lance/data/732b8bb1-f2fc-42b4-b3d1-3a830cc672bc.lance
new file mode 100644
index 0000000000000000000000000000000000000000..59b2cca3cb07735939fde143ea5808a4ebd16a10
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/732b8bb1-f2fc-42b4-b3d1-3a830cc672bc.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc4e65ff4dbf5404db807310f76007bb28274352733b76b703c951a1ccdb01f1
+size 73635
diff --git a/.lancedb/model1_fixed.lance/data/793c9b4f-803a-4da7-8389-f6dd29282a1f.lance b/.lancedb/model1_fixed.lance/data/793c9b4f-803a-4da7-8389-f6dd29282a1f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f9df5fecc8d137c28ea07d6da29bd4bcc0ba8b84
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/793c9b4f-803a-4da7-8389-f6dd29282a1f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65306910d23a32d197843f32b94b33d13e5914cbe21ba09a079de820d9af8aec
+size 76183
diff --git a/.lancedb/model1_fixed.lance/data/7eb5754c-b13d-49fc-9b44-7a1e23bdf273.lance b/.lancedb/model1_fixed.lance/data/7eb5754c-b13d-49fc-9b44-7a1e23bdf273.lance
new file mode 100644
index 0000000000000000000000000000000000000000..1623f82ad88c95a602435895bb99bbcf1afd994b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/7eb5754c-b13d-49fc-9b44-7a1e23bdf273.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc82192c4be50c838534fa68ab8dde35b03105b21e5d5b27a7e0be07193fc1d2
+size 75067
diff --git a/.lancedb/model1_fixed.lance/data/7f29b1b7-264e-4ade-86cd-b6258a004592.lance b/.lancedb/model1_fixed.lance/data/7f29b1b7-264e-4ade-86cd-b6258a004592.lance
new file mode 100644
index 0000000000000000000000000000000000000000..abf65791fa7d95de675d8e66c6f7201b10562979
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/7f29b1b7-264e-4ade-86cd-b6258a004592.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68d332b7ac05895f2401ebe472645a8369565f505e93c104ec39805510548ae5
+size 73895
diff --git a/.lancedb/model1_fixed.lance/data/802a9bf2-8cbb-4a94-9a4f-8d1a1f6a1c63.lance b/.lancedb/model1_fixed.lance/data/802a9bf2-8cbb-4a94-9a4f-8d1a1f6a1c63.lance
new file mode 100644
index 0000000000000000000000000000000000000000..eb13ee72d7b0c30a1d2d30196541914deeb202d7
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/802a9bf2-8cbb-4a94-9a4f-8d1a1f6a1c63.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3dcde3b96eee29bc60fccf6df51ff97bfec394f272d94e72031d8954dacff02
+size 71535
diff --git a/.lancedb/model1_fixed.lance/data/80a302b4-22aa-494c-97eb-3ae75f571ee7.lance b/.lancedb/model1_fixed.lance/data/80a302b4-22aa-494c-97eb-3ae75f571ee7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..38371d58477bdcd3697113758663d8a9bc181c69
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/80a302b4-22aa-494c-97eb-3ae75f571ee7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52afb1de4686e99e1ef9a2d83f147ad5cea0e5b53c38e98214fa3c27424875f5
+size 75369
diff --git a/.lancedb/model1_fixed.lance/data/83b3717e-1ba4-4968-9aa4-d657304432c6.lance b/.lancedb/model1_fixed.lance/data/83b3717e-1ba4-4968-9aa4-d657304432c6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6737bc7bbf442a81ec09c84569e8d86b81b42ca9
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/83b3717e-1ba4-4968-9aa4-d657304432c6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb688e1c33094c6c1608524520a68c8da89710ac65277d72d673893664a869d4
+size 69441
diff --git a/.lancedb/model1_fixed.lance/data/843372d8-d5ce-4289-96d6-c8e8501ab567.lance b/.lancedb/model1_fixed.lance/data/843372d8-d5ce-4289-96d6-c8e8501ab567.lance
new file mode 100644
index 0000000000000000000000000000000000000000..9e1f0b29167301c90acce43e69b8737dda11a91b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/843372d8-d5ce-4289-96d6-c8e8501ab567.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:961b739b93338f67d742ed5ed4f6e9b28a9d9221aafd64c9dc1ec1a70041d0f0
+size 69992
diff --git a/.lancedb/model1_fixed.lance/data/8849f964-a048-4605-9d14-d3fc94ec9fb7.lance b/.lancedb/model1_fixed.lance/data/8849f964-a048-4605-9d14-d3fc94ec9fb7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6eaefafcd4a8a245a3d2958e757749d42c485e2d
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/8849f964-a048-4605-9d14-d3fc94ec9fb7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d35be1e5c8927d8515fb0bbea68662c7a870752947a2b8f32da90a254f3522e5
+size 68984
diff --git a/.lancedb/model1_fixed.lance/data/889f59dd-7d72-49f5-8314-693aa0e8d220.lance b/.lancedb/model1_fixed.lance/data/889f59dd-7d72-49f5-8314-693aa0e8d220.lance
new file mode 100644
index 0000000000000000000000000000000000000000..bb76aa37f2649b59698e97bafb91ea26afd338b9
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/889f59dd-7d72-49f5-8314-693aa0e8d220.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2d096ca34701a73bdaaed30533b3adf57f6d8025cd61fc19223eb03698f6e47
+size 71592
diff --git a/.lancedb/model1_fixed.lance/data/8ecfe69f-241f-4d04-920c-b9745febace4.lance b/.lancedb/model1_fixed.lance/data/8ecfe69f-241f-4d04-920c-b9745febace4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..090e7a923634c16e96fc6748c34fedefb5f495f6
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/8ecfe69f-241f-4d04-920c-b9745febace4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb748fae9a23156890a32e9163a09856d6d3448269f3a78d90c6de54383ba951
+size 72117
diff --git a/.lancedb/model1_fixed.lance/data/9010a388-d95f-4407-96e6-10fb14bfc9a9.lance b/.lancedb/model1_fixed.lance/data/9010a388-d95f-4407-96e6-10fb14bfc9a9.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e30ee5690aff996a853d12ac135a9c4aa5d52d84
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/9010a388-d95f-4407-96e6-10fb14bfc9a9.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15dc98df88914a741fdb37c20e092247d16d606faeaf53c982aecfc8d9159287
+size 67110
diff --git a/.lancedb/model1_fixed.lance/data/91490928-6f3d-400a-8b7d-b3387c7d35e0.lance b/.lancedb/model1_fixed.lance/data/91490928-6f3d-400a-8b7d-b3387c7d35e0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..36460c8188d7f2217fc231241ff1b33b693a5f65
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/91490928-6f3d-400a-8b7d-b3387c7d35e0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6cc4589811a000930b44af96a0ec5b275ef8545a521fbe8b93936da56dd0c09
+size 68353
diff --git a/.lancedb/model1_fixed.lance/data/91add2eb-85c3-4ee7-8410-265f625e022d.lance b/.lancedb/model1_fixed.lance/data/91add2eb-85c3-4ee7-8410-265f625e022d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b338b3e1438f603a23629ad1e82e60e307f775a0
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/91add2eb-85c3-4ee7-8410-265f625e022d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:178b5e47378893f047347a54d353109192df57d39f445b79a027c4a1025c91d6
+size 73498
diff --git a/.lancedb/model1_fixed.lance/data/9a15c96d-226d-44db-b1b1-7e75026917e7.lance b/.lancedb/model1_fixed.lance/data/9a15c96d-226d-44db-b1b1-7e75026917e7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..cfefee94030a4f29aeefbb6a97eaa52eed7794ce
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/9a15c96d-226d-44db-b1b1-7e75026917e7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5073485f505aac6f90d117405430330986fe1d075fb89e1d806a63f6fe986b45
+size 68694
diff --git a/.lancedb/model1_fixed.lance/data/9a6a4c70-a1b9-4ba9-8cb3-62b23ffe9d33.lance b/.lancedb/model1_fixed.lance/data/9a6a4c70-a1b9-4ba9-8cb3-62b23ffe9d33.lance
new file mode 100644
index 0000000000000000000000000000000000000000..98bd30065d48aa2a778df3162f9a478a71a81f05
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/9a6a4c70-a1b9-4ba9-8cb3-62b23ffe9d33.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2625cfe67edd6de9c8150a5df0558d5548d70649a5956c0f73918e46abb031e8
+size 69892
diff --git a/.lancedb/model1_fixed.lance/data/9b0a8b73-29b9-4e65-ad33-079ce73bc29e.lance b/.lancedb/model1_fixed.lance/data/9b0a8b73-29b9-4e65-ad33-079ce73bc29e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..053ec272c45168c573d1d027c2806fe0b6ecc9e0
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/9b0a8b73-29b9-4e65-ad33-079ce73bc29e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17200d645987c03600e3c660296f4ec502ae8b4e30b42a7a2e0f4bcba5f865b6
+size 68204
diff --git a/.lancedb/model1_fixed.lance/data/9ce75d02-c33c-4731-a90d-fb6e07ffe56f.lance b/.lancedb/model1_fixed.lance/data/9ce75d02-c33c-4731-a90d-fb6e07ffe56f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..821d4e6e6b65a4ed45828f9ef4e83bbee0dde654
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/9ce75d02-c33c-4731-a90d-fb6e07ffe56f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e6f6415a599adea32ea7d31c4077dc3956659f0cb7b2acc5b2ceccafcde07b
+size 67993
diff --git a/.lancedb/model1_fixed.lance/data/a0408e63-6075-4e78-847c-05a5c76c19d0.lance b/.lancedb/model1_fixed.lance/data/a0408e63-6075-4e78-847c-05a5c76c19d0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..55e85ce1a770f5e2b24436f156047d1f0e2eb76e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a0408e63-6075-4e78-847c-05a5c76c19d0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1496690757265fe20c7a95074c5214fec59c200fda1b86c2edb1dd34dea5cfc
+size 74998
diff --git a/.lancedb/model1_fixed.lance/data/a1171de3-c776-4d74-b10c-a4161a9c3781.lance b/.lancedb/model1_fixed.lance/data/a1171de3-c776-4d74-b10c-a4161a9c3781.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7f6983ba75960047b39766da743528105673d9ad
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a1171de3-c776-4d74-b10c-a4161a9c3781.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bf4be7505c46ce83d37761fa58b5abd8e06038b5bc27897a67c21d12202e009
+size 67805
diff --git a/.lancedb/model1_fixed.lance/data/a1ad99c8-41bc-42d9-b65b-9f210b91a8d5.lance b/.lancedb/model1_fixed.lance/data/a1ad99c8-41bc-42d9-b65b-9f210b91a8d5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0ae529150531f03111c35edcfcf0297e4b107898
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a1ad99c8-41bc-42d9-b65b-9f210b91a8d5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e438602e0240aa4f8f47161d2ebf1fc05dfee44b29dab4303a8fbaf478895d23
+size 17631
diff --git a/.lancedb/model1_fixed.lance/data/a6b1f8b5-fbbf-41be-bbb4-bcc8c7081000.lance b/.lancedb/model1_fixed.lance/data/a6b1f8b5-fbbf-41be-bbb4-bcc8c7081000.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b9899b1312af8c9a19b299efa90e32695743a1c0
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a6b1f8b5-fbbf-41be-bbb4-bcc8c7081000.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3969cdfb9e3a159e9d55eefef881753944fd4f31d7ade4254259d438ba5bf2a1
+size 67099
diff --git a/.lancedb/model1_fixed.lance/data/a6fb4d2e-d476-4cc2-825b-be8f78489b82.lance b/.lancedb/model1_fixed.lance/data/a6fb4d2e-d476-4cc2-825b-be8f78489b82.lance
new file mode 100644
index 0000000000000000000000000000000000000000..15e05dd32b15d5c0f7283b13b1c22d95ecc535e1
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a6fb4d2e-d476-4cc2-825b-be8f78489b82.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:996a19efdefc115311bffad28f1f758a4d88113c6863c0f44fb39edb0c7b584d
+size 70719
diff --git a/.lancedb/model1_fixed.lance/data/a8508f12-bc8d-4303-b6f7-c017db432b4a.lance b/.lancedb/model1_fixed.lance/data/a8508f12-bc8d-4303-b6f7-c017db432b4a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0f48d4bc7c9eba48086b0293ddd8911bc9215e71
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/a8508f12-bc8d-4303-b6f7-c017db432b4a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8324099909dc6480c81388114d7f42415dd91b67b45a110d349dce03ace9f6b
+size 69380
diff --git a/.lancedb/model1_fixed.lance/data/aa446c9d-7b26-46b8-bde4-028110005708.lance b/.lancedb/model1_fixed.lance/data/aa446c9d-7b26-46b8-bde4-028110005708.lance
new file mode 100644
index 0000000000000000000000000000000000000000..39b3f02abfdcc991db421d10c0d24d6d01d3dabc
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/aa446c9d-7b26-46b8-bde4-028110005708.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30920402e5f5fcff6fa17b924ce4712f542bb4bf3c95672b56a2894ea5ec9adb
+size 68911
diff --git a/.lancedb/model1_fixed.lance/data/aa692b9f-3c3a-471e-b35f-537c6413479b.lance b/.lancedb/model1_fixed.lance/data/aa692b9f-3c3a-471e-b35f-537c6413479b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..49ea11872491c9baf9b2828b32a9590bcb9ce17b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/aa692b9f-3c3a-471e-b35f-537c6413479b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78c388b8e649755d5e863f7de57fe723c76d0dbbc24ad2fde5861d303d9ab4ca
+size 65483
diff --git a/.lancedb/model1_fixed.lance/data/ac65019b-5f59-4065-a807-ee5a7b5b9482.lance b/.lancedb/model1_fixed.lance/data/ac65019b-5f59-4065-a807-ee5a7b5b9482.lance
new file mode 100644
index 0000000000000000000000000000000000000000..3e8721564560927446bd6ed605c147443289b2f2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/ac65019b-5f59-4065-a807-ee5a7b5b9482.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc09ab64e3a280ac07e4708f23e8c9d87218decc21f21dbeacbfd59d6f9323cb
+size 71281
diff --git a/.lancedb/model1_fixed.lance/data/afab1856-a576-4659-b911-e4542e60d171.lance b/.lancedb/model1_fixed.lance/data/afab1856-a576-4659-b911-e4542e60d171.lance
new file mode 100644
index 0000000000000000000000000000000000000000..71637432920d5057f62bdcd90d5fbed1706bfa67
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/afab1856-a576-4659-b911-e4542e60d171.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5670bf8ec91bd1b1f23d6d9687993dc8e4f430d170af8526e88f74eff35ef6d
+size 68000
diff --git a/.lancedb/model1_fixed.lance/data/b04f96d1-2b9b-4b5e-b71b-d32f4768f7b3.lance b/.lancedb/model1_fixed.lance/data/b04f96d1-2b9b-4b5e-b71b-d32f4768f7b3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2b87f662b72b5ddeace207cd26857fa47ee6f525
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b04f96d1-2b9b-4b5e-b71b-d32f4768f7b3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2821860ba111b35e952982cc841cbedb2fb6b9c083256ebb615250fcb542ceac
+size 66090
diff --git a/.lancedb/model1_fixed.lance/data/b2cad6e2-1eb1-42e5-abeb-8eac09908e8e.lance b/.lancedb/model1_fixed.lance/data/b2cad6e2-1eb1-42e5-abeb-8eac09908e8e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e55433c62e669796fd50d6764b329dbd1d95253e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b2cad6e2-1eb1-42e5-abeb-8eac09908e8e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28da3572b6699c1f289eb78dc40855bcb1f498019d2caaba16f697de7a70abec
+size 71086
diff --git a/.lancedb/model1_fixed.lance/data/b48e6035-f932-4304-a18c-872644202cb3.lance b/.lancedb/model1_fixed.lance/data/b48e6035-f932-4304-a18c-872644202cb3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..cd8610ec7fa00ce4db38efc58167eaac9a27d80f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b48e6035-f932-4304-a18c-872644202cb3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:350963b5eb4561111294bb9cc81d997d4271f5db12f32b2cdd22a9155c3f2840
+size 74313
diff --git a/.lancedb/model1_fixed.lance/data/b4db39b7-3f79-402a-a305-902b1c742ce2.lance b/.lancedb/model1_fixed.lance/data/b4db39b7-3f79-402a-a305-902b1c742ce2.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e8922b651c70484e425bdadc8af71161cbb70490
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b4db39b7-3f79-402a-a305-902b1c742ce2.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e01c4bed976a42cdf0e672ba2e760193437c6e9f839ddbf61ec3466f0c347e98
+size 70699
diff --git a/.lancedb/model1_fixed.lance/data/b5ab4462-3c2b-49bb-89ea-e178ef061fcf.lance b/.lancedb/model1_fixed.lance/data/b5ab4462-3c2b-49bb-89ea-e178ef061fcf.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2f414ed80db25c87570587a5a9c90e68e50b6420
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b5ab4462-3c2b-49bb-89ea-e178ef061fcf.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c707c24859e6e5961372b02940c4bbfec6b34edd6814e242765a4b01fa112a1
+size 75644
diff --git a/.lancedb/model1_fixed.lance/data/b9956365-9b56-4568-96d0-7e9f878e0be8.lance b/.lancedb/model1_fixed.lance/data/b9956365-9b56-4568-96d0-7e9f878e0be8.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b414cdeed32b8bd0dd70c116c05a95e0824f4c5b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/b9956365-9b56-4568-96d0-7e9f878e0be8.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aee53234317db238fe2c931c031757df405065eefc1347d85ce01b3863850e5f
+size 65466
diff --git a/.lancedb/model1_fixed.lance/data/bc6003bb-bb71-461f-a78a-01b61b4dbd72.lance b/.lancedb/model1_fixed.lance/data/bc6003bb-bb71-461f-a78a-01b61b4dbd72.lance
new file mode 100644
index 0000000000000000000000000000000000000000..43ff5678cd3f6d188b346c46fd85b7dbf60f9265
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/bc6003bb-bb71-461f-a78a-01b61b4dbd72.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee08ba90f847b07549fb07f25425c59c91b090d38484141f01a55805dd5a5fa1
+size 75408
diff --git a/.lancedb/model1_fixed.lance/data/bff672ed-f219-41cb-9564-4b99e756975f.lance b/.lancedb/model1_fixed.lance/data/bff672ed-f219-41cb-9564-4b99e756975f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e8cc7caaab156f39de4670bc4225cf2743bdc165
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/bff672ed-f219-41cb-9564-4b99e756975f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:289cc87120dbb64f82f5c3046b6f79af50df702690ee52d34a45c0e51f35ffef
+size 67979
diff --git a/.lancedb/model1_fixed.lance/data/c3350a29-d0e1-41bb-acaa-5524aa363d69.lance b/.lancedb/model1_fixed.lance/data/c3350a29-d0e1-41bb-acaa-5524aa363d69.lance
new file mode 100644
index 0000000000000000000000000000000000000000..567d7d63d9a26b5d67c99f0744f990a95cb46ab9
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c3350a29-d0e1-41bb-acaa-5524aa363d69.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:315f2b0d4cdc03a5e224ee76c228a52942a49c0446bde6ceb032f59000a4f2e5
+size 71507
diff --git a/.lancedb/model1_fixed.lance/data/c34fe883-e62a-4a93-bd4f-1b6a7b854318.lance b/.lancedb/model1_fixed.lance/data/c34fe883-e62a-4a93-bd4f-1b6a7b854318.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7901aaa94cd743bee00589e8e8e65821d6829580
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c34fe883-e62a-4a93-bd4f-1b6a7b854318.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5de7fc64de19ce59e7afd4b8360fd0eafaba6ec9c6efd3a647a099fb67ccd1
+size 69862
diff --git a/.lancedb/model1_fixed.lance/data/c4781648-387f-4618-b4e8-d8fb379276e0.lance b/.lancedb/model1_fixed.lance/data/c4781648-387f-4618-b4e8-d8fb379276e0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e5ab978a889874017402cb163300e40fc7147790
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c4781648-387f-4618-b4e8-d8fb379276e0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5ec00b4b76023e1acb919b072b9dda35bb3f192088bd9fe64c3efaa4fc9446d
+size 71610
diff --git a/.lancedb/model1_fixed.lance/data/c654add8-49fc-495b-b509-92c6fa6f339f.lance b/.lancedb/model1_fixed.lance/data/c654add8-49fc-495b-b509-92c6fa6f339f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0388f6c32e99446eb66d5357c288ee98e8b44d6e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c654add8-49fc-495b-b509-92c6fa6f339f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3002f58972fa5ea8cb9cf1cc04727970f7568483632d35829dd4592277f49424
+size 60942
diff --git a/.lancedb/model1_fixed.lance/data/c71d56c6-d004-4295-b13f-dc89afded639.lance b/.lancedb/model1_fixed.lance/data/c71d56c6-d004-4295-b13f-dc89afded639.lance
new file mode 100644
index 0000000000000000000000000000000000000000..697022d5e7e2679a3bc2526183918e0224574858
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c71d56c6-d004-4295-b13f-dc89afded639.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5858a7179a6055df34e144cb90cbd0a1a25ab17bc8d5729afa70f7f7d0975a3
+size 73240
diff --git a/.lancedb/model1_fixed.lance/data/c809f477-7d70-4b82-b9ed-2bff9cf9a8a5.lance b/.lancedb/model1_fixed.lance/data/c809f477-7d70-4b82-b9ed-2bff9cf9a8a5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..477f2fb4b489ae3afc1f02c769689a50a4bec18f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c809f477-7d70-4b82-b9ed-2bff9cf9a8a5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6f3f967c77d08f1944353fa7660b120b809bce94c2c7946dbc9ca6083f5f451
+size 68510
diff --git a/.lancedb/model1_fixed.lance/data/c8e26cce-385a-484b-9bbd-27e4f930f376.lance b/.lancedb/model1_fixed.lance/data/c8e26cce-385a-484b-9bbd-27e4f930f376.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4da14c3f28106c81b89a7d2d8b5255833cd2861e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c8e26cce-385a-484b-9bbd-27e4f930f376.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4fa9c362701737471acd7d45b5a1d293e98ec89ba9f3ad725abd48d8e1565b8
+size 68721
diff --git a/.lancedb/model1_fixed.lance/data/c9c1ed88-f2d7-4749-8867-7d057c159055.lance b/.lancedb/model1_fixed.lance/data/c9c1ed88-f2d7-4749-8867-7d057c159055.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b9fecb0e5e6cccaf4d12540ff482b6e951f69c6a
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/c9c1ed88-f2d7-4749-8867-7d057c159055.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d9864f030b424f7bef169f3d11151ce250f78661edcb78f1b7a0954da57ab3
+size 66874
diff --git a/.lancedb/model1_fixed.lance/data/caeaa499-30c1-4fb6-952a-afb2fb04bc1d.lance b/.lancedb/model1_fixed.lance/data/caeaa499-30c1-4fb6-952a-afb2fb04bc1d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d089688bd9b0d077f305329dee226d762b765599
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/caeaa499-30c1-4fb6-952a-afb2fb04bc1d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bda63a3b23c56ba8d73c1a7de59fe947da6d2671c7ed8d6da45a44eaa42e5679
+size 66011
diff --git a/.lancedb/model1_fixed.lance/data/cb8ed26d-637f-4a5b-8992-2d08da15a89d.lance b/.lancedb/model1_fixed.lance/data/cb8ed26d-637f-4a5b-8992-2d08da15a89d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2b86c99de23564be34f0956ccf037132e3947a06
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/cb8ed26d-637f-4a5b-8992-2d08da15a89d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:460ea6bd833c4ebfb74480c012725ef0ec20bccbb22877f343bacbad94b2315b
+size 69710
diff --git a/.lancedb/model1_fixed.lance/data/ccf5a3f9-5154-4bc4-b20c-b45689d210be.lance b/.lancedb/model1_fixed.lance/data/ccf5a3f9-5154-4bc4-b20c-b45689d210be.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c2c46686aae15113ee928c02f64c6ef567e8fbcb
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/ccf5a3f9-5154-4bc4-b20c-b45689d210be.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:668172380ba2cbb9890a0fed1f656bd2f13002b45147a015539afc04a7e36158
+size 72013
diff --git a/.lancedb/model1_fixed.lance/data/cf0ca7bb-8158-4fc5-931d-1c7b802ab3c1.lance b/.lancedb/model1_fixed.lance/data/cf0ca7bb-8158-4fc5-931d-1c7b802ab3c1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..47bfbb10f456e7ea417a0f3619ca27599566786b
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/cf0ca7bb-8158-4fc5-931d-1c7b802ab3c1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89ce880b4ef0762d9914c32dcebea3c22666d36163b2494b00ce3983a88139fb
+size 68109
diff --git a/.lancedb/model1_fixed.lance/data/cf4ffa50-a71b-4602-a140-12acabda491b.lance b/.lancedb/model1_fixed.lance/data/cf4ffa50-a71b-4602-a140-12acabda491b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6bee5d37f7140e5abd328722663aa2fc13700960
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/cf4ffa50-a71b-4602-a140-12acabda491b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c98dbea34e82094727b6c8b43d28e3cb606d8987edeef3ff6c23a667b25814e8
+size 78692
diff --git a/.lancedb/model1_fixed.lance/data/cfd970ca-cf95-4781-8a33-894028be98a0.lance b/.lancedb/model1_fixed.lance/data/cfd970ca-cf95-4781-8a33-894028be98a0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..658e20ca8e4af88c8a60936b4f107e80ef2461de
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/cfd970ca-cf95-4781-8a33-894028be98a0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e2c2820af91f4483403f8515ea43ff0b811391aa0d0a9b47ac751e10c66c590
+size 69224
diff --git a/.lancedb/model1_fixed.lance/data/d1dd23c8-2951-4d2c-b45a-92b66f68013a.lance b/.lancedb/model1_fixed.lance/data/d1dd23c8-2951-4d2c-b45a-92b66f68013a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2b4b3e08b32ae869d77d634c9403c0c2ad59ff8e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/d1dd23c8-2951-4d2c-b45a-92b66f68013a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f46d2da722c4864228fc39013bff93540b1d1ba3625d6ef33570a418fd3ca4ab
+size 65256
diff --git a/.lancedb/model1_fixed.lance/data/d3b30b6c-a34f-49ac-b225-d03cfa2a6632.lance b/.lancedb/model1_fixed.lance/data/d3b30b6c-a34f-49ac-b225-d03cfa2a6632.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4423c475a6bebc5e7a4c86644af287858ea93080
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/d3b30b6c-a34f-49ac-b225-d03cfa2a6632.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc98969671a17ea177cec5831832021d2cd5b0f26a9a2f3c23eb1519f11c2787
+size 67607
diff --git a/.lancedb/model1_fixed.lance/data/d445c93d-ba7c-48c1-8fa0-88e022a18165.lance b/.lancedb/model1_fixed.lance/data/d445c93d-ba7c-48c1-8fa0-88e022a18165.lance
new file mode 100644
index 0000000000000000000000000000000000000000..10ceed77de2f31dd1c0ad7b21389f6adb62f4ea4
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/d445c93d-ba7c-48c1-8fa0-88e022a18165.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61be8e219b80f9b826620d4c240c7998ea60ccd026659f1e42abe288e8fd864a
+size 66429
diff --git a/.lancedb/model1_fixed.lance/data/d8129d48-3932-478f-8ef8-75d3b567d39a.lance b/.lancedb/model1_fixed.lance/data/d8129d48-3932-478f-8ef8-75d3b567d39a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a7036020a6d7853aa5f3b8d57d7f2b03c9d44b6f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/d8129d48-3932-478f-8ef8-75d3b567d39a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70e69347b582c1f8beef3ed2d3bea931501f96c6c80341387fb3e61c70a8b0a5
+size 62740
diff --git a/.lancedb/model1_fixed.lance/data/da1b6387-9614-4f12-b39b-2764a926e8bf.lance b/.lancedb/model1_fixed.lance/data/da1b6387-9614-4f12-b39b-2764a926e8bf.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e37a871fc0f1c0b5762825b670de584ceea157f2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/da1b6387-9614-4f12-b39b-2764a926e8bf.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bfc745bca5e0773cb9800c0e51b292b137a68b68210da02331835b49a09ac0b
+size 69767
diff --git a/.lancedb/model1_fixed.lance/data/da8a5f57-9ba6-49db-a1fa-9fa3568fbcf4.lance b/.lancedb/model1_fixed.lance/data/da8a5f57-9ba6-49db-a1fa-9fa3568fbcf4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..824c39e2425165bf5a0638ab2c948487555c37ae
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/da8a5f57-9ba6-49db-a1fa-9fa3568fbcf4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aea1b39237fcb6dd11e9279134df3d0477bcac6c3fadd4904e2c94144618a524
+size 68852
diff --git a/.lancedb/model1_fixed.lance/data/dbb0f714-4647-4269-8ff0-b6db68a0eaee.lance b/.lancedb/model1_fixed.lance/data/dbb0f714-4647-4269-8ff0-b6db68a0eaee.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b6d974b8deac7408e6e3714a5d9669e5463c8c2a
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/dbb0f714-4647-4269-8ff0-b6db68a0eaee.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c81bf8db79c126a6ce36512d38ef8c266b87060bd03b7f709f75f7dfe7246bca
+size 70602
diff --git a/.lancedb/model1_fixed.lance/data/dc2bef7f-363a-4b33-860d-5e5f6ef7421e.lance b/.lancedb/model1_fixed.lance/data/dc2bef7f-363a-4b33-860d-5e5f6ef7421e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d067046827b9832919bf990f8eea33c3a7173812
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/dc2bef7f-363a-4b33-860d-5e5f6ef7421e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fde2d2f11f72b71a9a1e8eeb2d2e93481e15734f0cd241afd032352798aedca
+size 80885
diff --git a/.lancedb/model1_fixed.lance/data/e10e2c0d-ddd8-4f00-b450-4e580b081c2c.lance b/.lancedb/model1_fixed.lance/data/e10e2c0d-ddd8-4f00-b450-4e580b081c2c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0ef2a00cad4ab83c290e8aeeb74b477fc3b9d247
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/e10e2c0d-ddd8-4f00-b450-4e580b081c2c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b911d033ec12ca24c9c63619b9f2807cca1443fbabecefd5ff24ff3c01856963
+size 64430
diff --git a/.lancedb/model1_fixed.lance/data/eb71cd1a-de1b-447a-a524-5ee8de6f0a8d.lance b/.lancedb/model1_fixed.lance/data/eb71cd1a-de1b-447a-a524-5ee8de6f0a8d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..56dc0a6f6a6ed106f679a9715c0e8468fa92856d
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/eb71cd1a-de1b-447a-a524-5ee8de6f0a8d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f6091138130c390114b9112ccee6b2af4265ae36ec35cc49814e9fa591abe2f
+size 71444
diff --git a/.lancedb/model1_fixed.lance/data/ef7d25fc-74ad-46b0-a088-c3997ff0126b.lance b/.lancedb/model1_fixed.lance/data/ef7d25fc-74ad-46b0-a088-c3997ff0126b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..dc43a80be7247cd7c1553e21424783e07a75ed1e
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/ef7d25fc-74ad-46b0-a088-c3997ff0126b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:825b7b62259f11aa04647fb9b6a2123c3d2ac66fdc904f309ea8f7860715b7aa
+size 67238
diff --git a/.lancedb/model1_fixed.lance/data/f05f5cc4-c4f6-4121-8778-d871601bb248.lance b/.lancedb/model1_fixed.lance/data/f05f5cc4-c4f6-4121-8778-d871601bb248.lance
new file mode 100644
index 0000000000000000000000000000000000000000..eb8628f301497d67e186aa5e4d2734ddfba58a52
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f05f5cc4-c4f6-4121-8778-d871601bb248.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edecd337918e8424d0e093c4487159875462cd6e71c99e82363623e6d8afdf2e
+size 66950
diff --git a/.lancedb/model1_fixed.lance/data/f096c38c-400d-4eb8-9e9c-5acb45a5b807.lance b/.lancedb/model1_fixed.lance/data/f096c38c-400d-4eb8-9e9c-5acb45a5b807.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7e49a65e2056e94862a8e082c625963a6794f07c
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f096c38c-400d-4eb8-9e9c-5acb45a5b807.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bfa1351eae6303fd705a3276e7dd3d832beb4f88173016ac52e11af3dacdbbf
+size 75155
diff --git a/.lancedb/model1_fixed.lance/data/f19f0429-5830-468c-9fe7-c614e71a5f45.lance b/.lancedb/model1_fixed.lance/data/f19f0429-5830-468c-9fe7-c614e71a5f45.lance
new file mode 100644
index 0000000000000000000000000000000000000000..fb1527160c366be78e347a65177c50f35865cde9
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f19f0429-5830-468c-9fe7-c614e71a5f45.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6ec52985fef699be65a3ed442d88169ea75f9eae35e364915586c65c6242cf
+size 67639
diff --git a/.lancedb/model1_fixed.lance/data/f1d4ebc7-55c3-4b18-9504-cd96cc6bb014.lance b/.lancedb/model1_fixed.lance/data/f1d4ebc7-55c3-4b18-9504-cd96cc6bb014.lance
new file mode 100644
index 0000000000000000000000000000000000000000..94c718b77a2ba9161df4ecb5414e478cb099e73c
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f1d4ebc7-55c3-4b18-9504-cd96cc6bb014.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d84b5c0b687cfe38edb3fcbe51d209a375f5d04365929b3a59de97e5431e0f
+size 69518
diff --git a/.lancedb/model1_fixed.lance/data/f32c168b-f475-4d86-9bf5-207c2a0ca22a.lance b/.lancedb/model1_fixed.lance/data/f32c168b-f475-4d86-9bf5-207c2a0ca22a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e32619e9a9e99ed9de693ef5d4358cd7fb072774
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f32c168b-f475-4d86-9bf5-207c2a0ca22a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dd47fbd27ec3c4066c349e8caaf0b27807f7afb9a0b68bae8120de4bb5c03a4
+size 75499
diff --git a/.lancedb/model1_fixed.lance/data/f3f304ef-d524-42ab-97cf-367d56c8909a.lance b/.lancedb/model1_fixed.lance/data/f3f304ef-d524-42ab-97cf-367d56c8909a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..cc8e578afef52049866bd5995b8c48e1c1abd315
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f3f304ef-d524-42ab-97cf-367d56c8909a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4ddefad84f79bea18d0d5782375a1c9049cb0b6f7c5335fa520c762ac0d85d
+size 76279
diff --git a/.lancedb/model1_fixed.lance/data/f57eb9ef-69d9-4fee-a63c-56808df543e5.lance b/.lancedb/model1_fixed.lance/data/f57eb9ef-69d9-4fee-a63c-56808df543e5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..5aecf4efc29fa6a111d806b66059033fd89e02a2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f57eb9ef-69d9-4fee-a63c-56808df543e5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85c29039d6cfbd36ae7a3e7b58e59c3f88fc9779c8633a99e15720179981f6ca
+size 69465
diff --git a/.lancedb/model1_fixed.lance/data/f5a4def2-dcad-4d6e-bdef-89ea346ad2f9.lance b/.lancedb/model1_fixed.lance/data/f5a4def2-dcad-4d6e-bdef-89ea346ad2f9.lance
new file mode 100644
index 0000000000000000000000000000000000000000..683980097f9ba3008682f0c36dd2b38a26996e51
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f5a4def2-dcad-4d6e-bdef-89ea346ad2f9.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:155fe8423e4a9de9ec0a296ba5bb6db65bab43ee35cbe1f810aaa50ed5242cee
+size 71628
diff --git a/.lancedb/model1_fixed.lance/data/f6bd3719-ee16-4756-ab78-cc9e0dc5e838.lance b/.lancedb/model1_fixed.lance/data/f6bd3719-ee16-4756-ab78-cc9e0dc5e838.lance
new file mode 100644
index 0000000000000000000000000000000000000000..59347d02023c2205da9597a48625d43309bf125f
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/f6bd3719-ee16-4756-ab78-cc9e0dc5e838.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b992aa93ec9fab51d4ca663ef421b04a05a3612d490c00ab6d5347ff8956d6bb
+size 69981
diff --git a/.lancedb/model1_fixed.lance/data/fb0a260d-8a4b-4a59-aec8-df37f7190dec.lance b/.lancedb/model1_fixed.lance/data/fb0a260d-8a4b-4a59-aec8-df37f7190dec.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2f846baa44245b113e91e8faf3521ec49009cad2
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/fb0a260d-8a4b-4a59-aec8-df37f7190dec.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05eb812dd06bbd5427df761601afa5c67a25f94e3dfc4142dc59a9b4c40a76f1
+size 69312
diff --git a/.lancedb/model1_fixed.lance/data/fc5ca0bf-0cf1-40fe-9a0d-9802c7f01941.lance b/.lancedb/model1_fixed.lance/data/fc5ca0bf-0cf1-40fe-9a0d-9802c7f01941.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e09b22f1026de2fcbe1761c70777f6b1fb5d300d
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/fc5ca0bf-0cf1-40fe-9a0d-9802c7f01941.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:614fb27367544bf83b20e5b102c8a1ffc7029da530ea307c5c97b280115e7207
+size 74979
diff --git a/.lancedb/model1_fixed.lance/data/fcb544d3-71aa-4323-b940-a8207194077e.lance b/.lancedb/model1_fixed.lance/data/fcb544d3-71aa-4323-b940-a8207194077e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ce4ddfcef48e50429fe0746e23011a825c725631
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/fcb544d3-71aa-4323-b940-a8207194077e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84d2dac0f1cf04b1e20853ea3d661f64b2bd84665a0d0f4e3b09969b09cc1d7d
+size 74789
diff --git a/.lancedb/model1_fixed.lance/data/fd02a3c6-63b7-4497-817c-aa472d1257ec.lance b/.lancedb/model1_fixed.lance/data/fd02a3c6-63b7-4497-817c-aa472d1257ec.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c82c03f1ae58bd3e8b5554f74fea7b91a1164a36
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/fd02a3c6-63b7-4497-817c-aa472d1257ec.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53b53a4aca310aad21d2424638a60a0dd6cbf845f80a07d38b4909b7f10494ce
+size 69576
diff --git a/.lancedb/model1_fixed.lance/data/fefaac08-0a30-424c-98a6-ea62ccc31fc1.lance b/.lancedb/model1_fixed.lance/data/fefaac08-0a30-424c-98a6-ea62ccc31fc1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f4dd8eb7bc542ce6983b547fc5255cfbac24bb06
--- /dev/null
+++ b/.lancedb/model1_fixed.lance/data/fefaac08-0a30-424c-98a6-ea62ccc31fc1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65f0b683c2d503092ca8f29bad7606674c15f31f5340b8c1174e07514d439ece
+size 75031
diff --git a/.lancedb/model2_fixed.lance/_indices/08fb936e-0ad7-43c6-a23b-7a14dff46c92/index.idx b/.lancedb/model2_fixed.lance/_indices/08fb936e-0ad7-43c6-a23b-7a14dff46c92/index.idx
new file mode 100644
index 0000000000000000000000000000000000000000..a46a71fbcd7062ba8a428cf60af36c2a21af39ca
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/_indices/08fb936e-0ad7-43c6-a23b-7a14dff46c92/index.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58c3561543bf06fd902551a1372a985a5f611dbc734e355f3dd5b500f8ba4bae
+size 2465067
diff --git a/.lancedb/model2_fixed.lance/_transactions/0-f0d81612-4f43-4070-81ad-180a5550aa70.txn b/.lancedb/model2_fixed.lance/_transactions/0-f0d81612-4f43-4070-81ad-180a5550aa70.txn
new file mode 100644
index 0000000000000000000000000000000000000000..2fd630bf3f93930a0e00ed1cf209ba418f1eb3d2
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/_transactions/0-f0d81612-4f43-4070-81ad-180a5550aa70.txn
@@ -0,0 +1 @@
+$f0d81612-4f43-4070-81ad-180a5550aa70�V3vector ���������*fixed_size_list:float:102408text ���������*string08
\ No newline at end of file
diff --git a/.lancedb/model2_fixed.lance/_transactions/1-2adc2827-4ff7-4dca-a04d-622c18272e3e.txn b/.lancedb/model2_fixed.lance/_transactions/1-2adc2827-4ff7-4dca-a04d-622c18272e3e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..cce8920e85419c66cb358b7e5e4ffcfaa75315c9
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/1-2adc2827-4ff7-4dca-a04d-622c18272e3e.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/10-d4ecb86e-a4fe-4946-b832-5c5ce91d3925.txn b/.lancedb/model2_fixed.lance/_transactions/10-d4ecb86e-a4fe-4946-b832-5c5ce91d3925.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b8c114b05596fad1e23dfe8179c7e0a2e5552d8b
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/10-d4ecb86e-a4fe-4946-b832-5c5ce91d3925.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/11-c171eb9b-b867-41a9-956e-9a94a8a8a0cc.txn b/.lancedb/model2_fixed.lance/_transactions/11-c171eb9b-b867-41a9-956e-9a94a8a8a0cc.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1e64a6b566cad435bbbe8cec2c49a3997d67d928
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/11-c171eb9b-b867-41a9-956e-9a94a8a8a0cc.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/12-f97a00a6-1335-43bc-9aad-392d96c01566.txn b/.lancedb/model2_fixed.lance/_transactions/12-f97a00a6-1335-43bc-9aad-392d96c01566.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b9d3f7854333d1b61d40287e64a5ce050af0b280
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/12-f97a00a6-1335-43bc-9aad-392d96c01566.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/13-064bf244-6c0c-4bec-a912-c9a8729c14e4.txn b/.lancedb/model2_fixed.lance/_transactions/13-064bf244-6c0c-4bec-a912-c9a8729c14e4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..076cd196d93ae458f2e12766a80eed58a1e13e5a
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/13-064bf244-6c0c-4bec-a912-c9a8729c14e4.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/14-3fcf6348-0884-433f-8ccd-878c7e3c0c60.txn b/.lancedb/model2_fixed.lance/_transactions/14-3fcf6348-0884-433f-8ccd-878c7e3c0c60.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8359dc9d32f29e3548f5efc5122cead0c61f11dd
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/14-3fcf6348-0884-433f-8ccd-878c7e3c0c60.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/15-4e90313a-6015-4461-b0ee-8dfedbb22d74.txn b/.lancedb/model2_fixed.lance/_transactions/15-4e90313a-6015-4461-b0ee-8dfedbb22d74.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9aef83a8c0bc6ceb6f7c7974dede68dadde38361
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/15-4e90313a-6015-4461-b0ee-8dfedbb22d74.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/16-d26686f1-9c56-41f3-883d-9dbcb60a226c.txn b/.lancedb/model2_fixed.lance/_transactions/16-d26686f1-9c56-41f3-883d-9dbcb60a226c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b195d32688a25eaf6d34b3083529c0317e9a82e3
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/16-d26686f1-9c56-41f3-883d-9dbcb60a226c.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/17-13aeb23e-1bb8-4b00-96f9-728ef322ec0c.txn b/.lancedb/model2_fixed.lance/_transactions/17-13aeb23e-1bb8-4b00-96f9-728ef322ec0c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..4f24b408d0f4e8607a8b864ea385eff607406db9
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/17-13aeb23e-1bb8-4b00-96f9-728ef322ec0c.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/18-cea7bf3f-121c-4321-8c76-10c78048b82f.txn b/.lancedb/model2_fixed.lance/_transactions/18-cea7bf3f-121c-4321-8c76-10c78048b82f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..392e3e0c55d7bb68961bdb0824edbd77370a2805
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/18-cea7bf3f-121c-4321-8c76-10c78048b82f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/19-5674d17a-69fd-4efe-9068-38eb29b3971f.txn b/.lancedb/model2_fixed.lance/_transactions/19-5674d17a-69fd-4efe-9068-38eb29b3971f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e098a108a4e914ded9e80ff58e1f275d7967d1d3
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/19-5674d17a-69fd-4efe-9068-38eb29b3971f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/2-d77a7756-e318-4394-8e91-b3d85393ea42.txn b/.lancedb/model2_fixed.lance/_transactions/2-d77a7756-e318-4394-8e91-b3d85393ea42.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c5ade51c14ed05d6c474dc51fdc44d53fe4b910d
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/2-d77a7756-e318-4394-8e91-b3d85393ea42.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/20-9cd01007-0051-40f0-b2b8-50aad684ead7.txn b/.lancedb/model2_fixed.lance/_transactions/20-9cd01007-0051-40f0-b2b8-50aad684ead7.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e0cf9d794fc41e0b438ceaeebde89decfd1c38a6
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/20-9cd01007-0051-40f0-b2b8-50aad684ead7.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/21-c5d35c0c-e5e7-4f83-941b-fe58b2924c6a.txn b/.lancedb/model2_fixed.lance/_transactions/21-c5d35c0c-e5e7-4f83-941b-fe58b2924c6a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b4e71e4b3e542336462c5445fdcdf2b5ce7f4a21
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/21-c5d35c0c-e5e7-4f83-941b-fe58b2924c6a.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/22-bd3cc570-f7bb-4087-90e2-d95b7c9ec56f.txn b/.lancedb/model2_fixed.lance/_transactions/22-bd3cc570-f7bb-4087-90e2-d95b7c9ec56f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7443a3ae911d8640a0270a29c85c0885dc00bb9e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/22-bd3cc570-f7bb-4087-90e2-d95b7c9ec56f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/23-ff0a43ff-c113-408a-afbb-340e6faaa8f1.txn b/.lancedb/model2_fixed.lance/_transactions/23-ff0a43ff-c113-408a-afbb-340e6faaa8f1.txn
new file mode 100644
index 0000000000000000000000000000000000000000..44edf626ebe54d93b0374f1aa209114977f48c7e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/23-ff0a43ff-c113-408a-afbb-340e6faaa8f1.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/24-431e3571-792a-43bb-8f16-215f13c6ab7c.txn b/.lancedb/model2_fixed.lance/_transactions/24-431e3571-792a-43bb-8f16-215f13c6ab7c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e8d27b4e34855ff998884e5bee476ad695630b58
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/24-431e3571-792a-43bb-8f16-215f13c6ab7c.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/25-c7a1379b-7048-4f75-9e4a-6186eed9749b.txn b/.lancedb/model2_fixed.lance/_transactions/25-c7a1379b-7048-4f75-9e4a-6186eed9749b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c40014eb380923a7ef0e9f9b745eb77cf312972e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/25-c7a1379b-7048-4f75-9e4a-6186eed9749b.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/26-264550d9-6424-4a9d-be6c-85d2aedfd57c.txn b/.lancedb/model2_fixed.lance/_transactions/26-264550d9-6424-4a9d-be6c-85d2aedfd57c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3ca9650ed1db5a03f507c483a22b9ab87db9d547
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/26-264550d9-6424-4a9d-be6c-85d2aedfd57c.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/27-122558a2-1bcd-4a57-bc74-2ee9c34cdfe1.txn b/.lancedb/model2_fixed.lance/_transactions/27-122558a2-1bcd-4a57-bc74-2ee9c34cdfe1.txn
new file mode 100644
index 0000000000000000000000000000000000000000..83605212182bb7f8f6960632a0bfc48c5ac26575
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/27-122558a2-1bcd-4a57-bc74-2ee9c34cdfe1.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/28-5b48f79f-8d5a-45b9-82f8-e78a0edf50b4.txn b/.lancedb/model2_fixed.lance/_transactions/28-5b48f79f-8d5a-45b9-82f8-e78a0edf50b4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9be05e7fba8a8cea7797cc650df56511d6ef3423
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/28-5b48f79f-8d5a-45b9-82f8-e78a0edf50b4.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/29-bea9f2f0-be37-46e4-ac85-0f9fa1ef8b5b.txn b/.lancedb/model2_fixed.lance/_transactions/29-bea9f2f0-be37-46e4-ac85-0f9fa1ef8b5b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..47345a8f9b74278e795b4a09812d3d2ff40f77a1
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/29-bea9f2f0-be37-46e4-ac85-0f9fa1ef8b5b.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/3-57a56a7f-1132-4e74-a5e0-c074fa9f35c9.txn b/.lancedb/model2_fixed.lance/_transactions/3-57a56a7f-1132-4e74-a5e0-c074fa9f35c9.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c709b7f300a278f9b4ae25a8a973471654d64a5d
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/3-57a56a7f-1132-4e74-a5e0-c074fa9f35c9.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/30-8a9d40bc-8ced-473f-8b47-0672ad9fd42b.txn b/.lancedb/model2_fixed.lance/_transactions/30-8a9d40bc-8ced-473f-8b47-0672ad9fd42b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..4795016d69b6eb4b81d4b415748cfa405e6e4369
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/30-8a9d40bc-8ced-473f-8b47-0672ad9fd42b.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/31-91be0c46-4b5a-4387-b2de-b355b64dcc94.txn b/.lancedb/model2_fixed.lance/_transactions/31-91be0c46-4b5a-4387-b2de-b355b64dcc94.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e5626f4e7c453df3e3a09a176e8744bc52ed204f
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/31-91be0c46-4b5a-4387-b2de-b355b64dcc94.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/32-bebd8a09-0db3-4240-b34c-d97e83af1dcb.txn b/.lancedb/model2_fixed.lance/_transactions/32-bebd8a09-0db3-4240-b34c-d97e83af1dcb.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ecd5fa47db41d6ed4b21d7177bb5fb0bcc327eb0
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/32-bebd8a09-0db3-4240-b34c-d97e83af1dcb.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/33-a0bd2901-855d-4c62-934a-80a23d4005df.txn b/.lancedb/model2_fixed.lance/_transactions/33-a0bd2901-855d-4c62-934a-80a23d4005df.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b331d2fbb195fb4343806566c243eace9b41812e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/33-a0bd2901-855d-4c62-934a-80a23d4005df.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/34-11f713d3-cf64-4dee-a477-8bd0974233c0.txn b/.lancedb/model2_fixed.lance/_transactions/34-11f713d3-cf64-4dee-a477-8bd0974233c0.txn
new file mode 100644
index 0000000000000000000000000000000000000000..89165b3331e7a18b294932449dddeedbb1f547e0
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/34-11f713d3-cf64-4dee-a477-8bd0974233c0.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/35-572a3b76-5ae3-4493-8032-d9bfad287c70.txn b/.lancedb/model2_fixed.lance/_transactions/35-572a3b76-5ae3-4493-8032-d9bfad287c70.txn
new file mode 100644
index 0000000000000000000000000000000000000000..75f585b268c885270bfe5d1bcf1b44ae358f25f5
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/35-572a3b76-5ae3-4493-8032-d9bfad287c70.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/36-99495fee-c757-44fb-b86a-d00cd4fbca9f.txn b/.lancedb/model2_fixed.lance/_transactions/36-99495fee-c757-44fb-b86a-d00cd4fbca9f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..717ce678f1d9e4447337b1d951688970cc07e169
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/36-99495fee-c757-44fb-b86a-d00cd4fbca9f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/37-5956fc67-b403-4174-907a-1b12e0765d3a.txn b/.lancedb/model2_fixed.lance/_transactions/37-5956fc67-b403-4174-907a-1b12e0765d3a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1f823e3ed876d9977dd4d5b510f148db650d02f7
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/37-5956fc67-b403-4174-907a-1b12e0765d3a.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/38-89ab4c24-6111-46ef-864f-b908fcf5bffd.txn b/.lancedb/model2_fixed.lance/_transactions/38-89ab4c24-6111-46ef-864f-b908fcf5bffd.txn
new file mode 100644
index 0000000000000000000000000000000000000000..68e9b0c08ba4ce000c795d6540a6b2adccd0c969
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/38-89ab4c24-6111-46ef-864f-b908fcf5bffd.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/39-ed99c46b-aa9e-4f4d-8168-fed47cf19053.txn b/.lancedb/model2_fixed.lance/_transactions/39-ed99c46b-aa9e-4f4d-8168-fed47cf19053.txn
new file mode 100644
index 0000000000000000000000000000000000000000..48e6757f339b43a3f0c2d13e3cdd47ffb37e718a
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/39-ed99c46b-aa9e-4f4d-8168-fed47cf19053.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/4-46939059-bca0-4cb8-85b4-40f273010672.txn b/.lancedb/model2_fixed.lance/_transactions/4-46939059-bca0-4cb8-85b4-40f273010672.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9403f517d542ec74cd156cd1cf8bcd63ba667bcc
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/4-46939059-bca0-4cb8-85b4-40f273010672.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/40-e7989ba0-7c18-4ec4-8df0-a5cbbb5e786c.txn b/.lancedb/model2_fixed.lance/_transactions/40-e7989ba0-7c18-4ec4-8df0-a5cbbb5e786c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1ac4d4ab9a0c89b4d32fa83d14d64ab4aede5094
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/40-e7989ba0-7c18-4ec4-8df0-a5cbbb5e786c.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/41-d47ac13e-b25f-44d6-b72a-ada8c1bc1739.txn b/.lancedb/model2_fixed.lance/_transactions/41-d47ac13e-b25f-44d6-b72a-ada8c1bc1739.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7323938aafe9d73d615a52564e81b853aae0afa4
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/41-d47ac13e-b25f-44d6-b72a-ada8c1bc1739.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/42-2deb46fb-6dd9-4fd4-847c-9bb51759b5c5.txn b/.lancedb/model2_fixed.lance/_transactions/42-2deb46fb-6dd9-4fd4-847c-9bb51759b5c5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..cb1a3483a190cc74071f216b915963cc72ffb379
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/42-2deb46fb-6dd9-4fd4-847c-9bb51759b5c5.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/43-76871754-dfac-4929-a76f-86d7b5736e66.txn b/.lancedb/model2_fixed.lance/_transactions/43-76871754-dfac-4929-a76f-86d7b5736e66.txn
new file mode 100644
index 0000000000000000000000000000000000000000..61231c4e75c25dc3821921e1fd7727bb0f443d95
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/43-76871754-dfac-4929-a76f-86d7b5736e66.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/44-e3b8317a-2ec7-4162-ae3e-35c45ac2b0da.txn b/.lancedb/model2_fixed.lance/_transactions/44-e3b8317a-2ec7-4162-ae3e-35c45ac2b0da.txn
new file mode 100644
index 0000000000000000000000000000000000000000..dab9a55e47d8050cf5170a5086e64b423e3e754e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/44-e3b8317a-2ec7-4162-ae3e-35c45ac2b0da.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/45-450be708-2830-4413-ae53-167f14e038b3.txn b/.lancedb/model2_fixed.lance/_transactions/45-450be708-2830-4413-ae53-167f14e038b3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3362b46b362cdedbb7b85c9df3196c205ed57c47
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/45-450be708-2830-4413-ae53-167f14e038b3.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/46-6ff36b10-7c1e-4d6d-a9b8-483ecd5e7b2d.txn b/.lancedb/model2_fixed.lance/_transactions/46-6ff36b10-7c1e-4d6d-a9b8-483ecd5e7b2d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0861fdff420dce3601cf7a0defe84e79dd9085c4
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/46-6ff36b10-7c1e-4d6d-a9b8-483ecd5e7b2d.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/47-ba2ccc2b-ba05-42e9-a94f-d286206ce4e1.txn b/.lancedb/model2_fixed.lance/_transactions/47-ba2ccc2b-ba05-42e9-a94f-d286206ce4e1.txn
new file mode 100644
index 0000000000000000000000000000000000000000..03854d5fbc5ce09337a62654df5d6ce8d3b3d9f3
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/47-ba2ccc2b-ba05-42e9-a94f-d286206ce4e1.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/48-37830432-f985-43df-97cf-406e9ecb0fa3.txn b/.lancedb/model2_fixed.lance/_transactions/48-37830432-f985-43df-97cf-406e9ecb0fa3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6e9156203a30e4caaa3ddde9b8ccac2ee63ba265
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/48-37830432-f985-43df-97cf-406e9ecb0fa3.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/49-ab53b621-2ddf-4bcb-bd0b-bc1a038c760f.txn b/.lancedb/model2_fixed.lance/_transactions/49-ab53b621-2ddf-4bcb-bd0b-bc1a038c760f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b4b093c23e57dacdbb04062f202fd3c347efdf10
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/49-ab53b621-2ddf-4bcb-bd0b-bc1a038c760f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/5-e4907c5a-c28e-420b-8984-c5b0d175d0c7.txn b/.lancedb/model2_fixed.lance/_transactions/5-e4907c5a-c28e-420b-8984-c5b0d175d0c7.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8917ea27a0c73318d4f2f2084cd6a2602b6fbb69
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/5-e4907c5a-c28e-420b-8984-c5b0d175d0c7.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/50-0f24bb4d-d143-4c1d-aec9-378e1ded98e4.txn b/.lancedb/model2_fixed.lance/_transactions/50-0f24bb4d-d143-4c1d-aec9-378e1ded98e4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..27c649da72d4d724783bff638fc938288c7dda45
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/50-0f24bb4d-d143-4c1d-aec9-378e1ded98e4.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/51-a3ac1efd-77fd-416c-b2a5-c77fa92cf865.txn b/.lancedb/model2_fixed.lance/_transactions/51-a3ac1efd-77fd-416c-b2a5-c77fa92cf865.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a8ba42d00b9a402e4e40196c3ebed8da888ca6b6
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/51-a3ac1efd-77fd-416c-b2a5-c77fa92cf865.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/52-6252e633-deb9-4c80-a762-eb07b287df5f.txn b/.lancedb/model2_fixed.lance/_transactions/52-6252e633-deb9-4c80-a762-eb07b287df5f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..960ecdc80ba49bfad105b6b6f66c769130f35ea7
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/52-6252e633-deb9-4c80-a762-eb07b287df5f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/53-9e559939-870f-44fe-a573-d1686e46c122.txn b/.lancedb/model2_fixed.lance/_transactions/53-9e559939-870f-44fe-a573-d1686e46c122.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c114a48ab96157f9e46ac13863c6de9c4b93bd4e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/53-9e559939-870f-44fe-a573-d1686e46c122.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/54-51f835a6-44c9-4774-918e-ac922fb6aa6f.txn b/.lancedb/model2_fixed.lance/_transactions/54-51f835a6-44c9-4774-918e-ac922fb6aa6f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..429fc3bc24a33446f12a48d8a388547ce6219aa7
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/54-51f835a6-44c9-4774-918e-ac922fb6aa6f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/55-3cd2de4e-c144-49e0-b36d-3a42c3a31421.txn b/.lancedb/model2_fixed.lance/_transactions/55-3cd2de4e-c144-49e0-b36d-3a42c3a31421.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f1e8d6497e3f2f5f45e6b3459b5e02a4330e3bee
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/55-3cd2de4e-c144-49e0-b36d-3a42c3a31421.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/56-16c06f70-e806-452d-afd0-033e2a886d36.txn b/.lancedb/model2_fixed.lance/_transactions/56-16c06f70-e806-452d-afd0-033e2a886d36.txn
new file mode 100644
index 0000000000000000000000000000000000000000..fdcc169be9a80dae5514e0bda03f0fd0f3ba5d39
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/56-16c06f70-e806-452d-afd0-033e2a886d36.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/57-f98fc71f-dcf0-4c40-ace5-09d9b8fc9b65.txn b/.lancedb/model2_fixed.lance/_transactions/57-f98fc71f-dcf0-4c40-ace5-09d9b8fc9b65.txn
new file mode 100644
index 0000000000000000000000000000000000000000..5649af9bec0bbd5c2b502ec33393a3bd4c6f7598
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/57-f98fc71f-dcf0-4c40-ace5-09d9b8fc9b65.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/58-33111a43-05cc-4fcf-b5f6-4bcd79721a16.txn b/.lancedb/model2_fixed.lance/_transactions/58-33111a43-05cc-4fcf-b5f6-4bcd79721a16.txn
new file mode 100644
index 0000000000000000000000000000000000000000..993ab99720ba15a70ddaee2f863358982fb6fed9
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/58-33111a43-05cc-4fcf-b5f6-4bcd79721a16.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/59-671e0bd9-bf61-4b5e-a7c9-fa650a8d38c3.txn b/.lancedb/model2_fixed.lance/_transactions/59-671e0bd9-bf61-4b5e-a7c9-fa650a8d38c3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9a16a4f423113b53f72dc13806914f4ee54ee668
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/59-671e0bd9-bf61-4b5e-a7c9-fa650a8d38c3.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/6-3b5e33cf-9ce8-4a01-aed1-bd7c9283b8b4.txn b/.lancedb/model2_fixed.lance/_transactions/6-3b5e33cf-9ce8-4a01-aed1-bd7c9283b8b4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c3238ae727f35be15e04df754bbfa2abca3a37ae
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/6-3b5e33cf-9ce8-4a01-aed1-bd7c9283b8b4.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/60-26c0d403-baff-481d-9088-cd1db84a42db.txn b/.lancedb/model2_fixed.lance/_transactions/60-26c0d403-baff-481d-9088-cd1db84a42db.txn
new file mode 100644
index 0000000000000000000000000000000000000000..68fab9edc8a081f137dcc9e271ebf239fe1e39c9
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/60-26c0d403-baff-481d-9088-cd1db84a42db.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/61-5573ae6e-22ee-4599-8be8-93a48ab55824.txn b/.lancedb/model2_fixed.lance/_transactions/61-5573ae6e-22ee-4599-8be8-93a48ab55824.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9c45350e97851c4d6763a763af52991702d9e65b
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/61-5573ae6e-22ee-4599-8be8-93a48ab55824.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/62-9714e52e-d079-4bc6-b52c-262578ea0cb0.txn b/.lancedb/model2_fixed.lance/_transactions/62-9714e52e-d079-4bc6-b52c-262578ea0cb0.txn
new file mode 100644
index 0000000000000000000000000000000000000000..4de49af891d73617983b61b5d39ee033dbba2b18
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/62-9714e52e-d079-4bc6-b52c-262578ea0cb0.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/63-4f97eeeb-dad7-4398-bde3-e235e5f675d6.txn b/.lancedb/model2_fixed.lance/_transactions/63-4f97eeeb-dad7-4398-bde3-e235e5f675d6.txn
new file mode 100644
index 0000000000000000000000000000000000000000..d1456ba5e6b71ac4262ec128281d8c36ee9de61e
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/63-4f97eeeb-dad7-4398-bde3-e235e5f675d6.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/64-1dd358ff-c42e-49ec-80d2-958fc28c807f.txn b/.lancedb/model2_fixed.lance/_transactions/64-1dd358ff-c42e-49ec-80d2-958fc28c807f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3ceebd7332bcde72c180dcb2754afb2b7ae2f937
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/64-1dd358ff-c42e-49ec-80d2-958fc28c807f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/65-4a105a73-4f42-4c09-8433-80cff77859d4.txn b/.lancedb/model2_fixed.lance/_transactions/65-4a105a73-4f42-4c09-8433-80cff77859d4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..87124a383dd336659348759a26a5ce3f629d0b59
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/65-4a105a73-4f42-4c09-8433-80cff77859d4.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/66-949564c2-9cc5-4eba-809c-001e21d4a204.txn b/.lancedb/model2_fixed.lance/_transactions/66-949564c2-9cc5-4eba-809c-001e21d4a204.txn
new file mode 100644
index 0000000000000000000000000000000000000000..ebba4bcc197ae98b37d68553f57e47c9efc1c278
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/66-949564c2-9cc5-4eba-809c-001e21d4a204.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/67-62252bb1-9574-435e-912b-6f17a2769a1e.txn b/.lancedb/model2_fixed.lance/_transactions/67-62252bb1-9574-435e-912b-6f17a2769a1e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e8bb3a08feace40c2b1748a2f23b230743ac3528
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/67-62252bb1-9574-435e-912b-6f17a2769a1e.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/68-6879c928-0b9d-4b31-b6be-ed6f437703df.txn b/.lancedb/model2_fixed.lance/_transactions/68-6879c928-0b9d-4b31-b6be-ed6f437703df.txn
new file mode 100644
index 0000000000000000000000000000000000000000..bd75d9d2954493d9a0efd45fc6126cd05ff26d67
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/68-6879c928-0b9d-4b31-b6be-ed6f437703df.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/69-9bd3009c-404c-43a8-a7d2-90123ce6c93e.txn b/.lancedb/model2_fixed.lance/_transactions/69-9bd3009c-404c-43a8-a7d2-90123ce6c93e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..cd4f2bfa940b4192cf22046e5e7e4b69c7b8cb10
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/69-9bd3009c-404c-43a8-a7d2-90123ce6c93e.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/7-820b6c85-9f1b-45b6-8dea-adf46ee3aaae.txn b/.lancedb/model2_fixed.lance/_transactions/7-820b6c85-9f1b-45b6-8dea-adf46ee3aaae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..801c38cc315cce73feac86778c821cecbebb98f0
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/7-820b6c85-9f1b-45b6-8dea-adf46ee3aaae.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/70-a9bee1d8-03ea-48a2-b868-cf43308d033a.txn b/.lancedb/model2_fixed.lance/_transactions/70-a9bee1d8-03ea-48a2-b868-cf43308d033a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3741e38f8fccfc705e7ef1db86c65e02c087d069
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/70-a9bee1d8-03ea-48a2-b868-cf43308d033a.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/71-8ef1146f-2f59-47e6-892b-e79c0e7fb21b.txn b/.lancedb/model2_fixed.lance/_transactions/71-8ef1146f-2f59-47e6-892b-e79c0e7fb21b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f6af011de34a180499d5632d9d42d9c80061c5c9
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/71-8ef1146f-2f59-47e6-892b-e79c0e7fb21b.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/72-ad8f7151-068c-4720-922f-0e6656144949.txn b/.lancedb/model2_fixed.lance/_transactions/72-ad8f7151-068c-4720-922f-0e6656144949.txn
new file mode 100644
index 0000000000000000000000000000000000000000..02c763322bac37a72719279564dcea9ff26fd9ab
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/72-ad8f7151-068c-4720-922f-0e6656144949.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/73-e5db18bf-eff1-4bf8-9167-0917562f95be.txn b/.lancedb/model2_fixed.lance/_transactions/73-e5db18bf-eff1-4bf8-9167-0917562f95be.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f8fcbc09b763217af100d7152a9f0ceb1dca809a
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/73-e5db18bf-eff1-4bf8-9167-0917562f95be.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/74-c21db710-2890-4379-ae61-49a8eb5c6151.txn b/.lancedb/model2_fixed.lance/_transactions/74-c21db710-2890-4379-ae61-49a8eb5c6151.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c441b6340e2dc57d45ef8ea9f5bc875ae96b2a8b
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/74-c21db710-2890-4379-ae61-49a8eb5c6151.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/75-5712a435-664c-4099-be6d-2f8dce10158a.txn b/.lancedb/model2_fixed.lance/_transactions/75-5712a435-664c-4099-be6d-2f8dce10158a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1bc8911dd685d98e3b40d3ee59cadf45e6ac30e4
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/75-5712a435-664c-4099-be6d-2f8dce10158a.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/76-a1626360-c3a4-4fc3-a6dd-135407428403.txn b/.lancedb/model2_fixed.lance/_transactions/76-a1626360-c3a4-4fc3-a6dd-135407428403.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c39058f5141425763f36bb3a4aed4d241663e0ab
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/76-a1626360-c3a4-4fc3-a6dd-135407428403.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/77-ce8a742b-eaea-48b8-b0d7-65d10be8c6ae.txn b/.lancedb/model2_fixed.lance/_transactions/77-ce8a742b-eaea-48b8-b0d7-65d10be8c6ae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..740a1c5001749c928704e826c40dec37638a048d
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/77-ce8a742b-eaea-48b8-b0d7-65d10be8c6ae.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/78-4b58b4b5-ac71-4ad6-888f-17d877a8a7c5.txn b/.lancedb/model2_fixed.lance/_transactions/78-4b58b4b5-ac71-4ad6-888f-17d877a8a7c5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9d729ea49fa45046195f97506dbfecee23f8686f
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/78-4b58b4b5-ac71-4ad6-888f-17d877a8a7c5.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/79-701437d3-a483-4587-8105-8947216695ae.txn b/.lancedb/model2_fixed.lance/_transactions/79-701437d3-a483-4587-8105-8947216695ae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..33d687a71d682d6e7d69bdc0bf39746e668ad827
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/79-701437d3-a483-4587-8105-8947216695ae.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/8-e038279b-3b85-4ad4-8225-cf3d057ce122.txn b/.lancedb/model2_fixed.lance/_transactions/8-e038279b-3b85-4ad4-8225-cf3d057ce122.txn
new file mode 100644
index 0000000000000000000000000000000000000000..283fdfe967f94ed26297ccb6d1a2166edf53a676
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/8-e038279b-3b85-4ad4-8225-cf3d057ce122.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/80-26fee38c-9612-4fbb-837f-c35cc618c27a.txn b/.lancedb/model2_fixed.lance/_transactions/80-26fee38c-9612-4fbb-837f-c35cc618c27a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9f5446a51d08beac2dcf2c7165272db2a28d1bc4
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/80-26fee38c-9612-4fbb-837f-c35cc618c27a.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/81-1677e1fa-846c-4c79-9fcb-2ca29d486173.txn b/.lancedb/model2_fixed.lance/_transactions/81-1677e1fa-846c-4c79-9fcb-2ca29d486173.txn
new file mode 100644
index 0000000000000000000000000000000000000000..53e453e2fe4286c025e7f44e5d3db1916b45dbe6
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/81-1677e1fa-846c-4c79-9fcb-2ca29d486173.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/82-0f10a87f-9f6d-4655-a91e-7b348c9eeb41.txn b/.lancedb/model2_fixed.lance/_transactions/82-0f10a87f-9f6d-4655-a91e-7b348c9eeb41.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0a8f5f46fdc908eb1ea31be9f2d0be0e3505e798
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/82-0f10a87f-9f6d-4655-a91e-7b348c9eeb41.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/83-459dd684-ab5a-4eb4-a8c3-a496d08e237d.txn b/.lancedb/model2_fixed.lance/_transactions/83-459dd684-ab5a-4eb4-a8c3-a496d08e237d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..674002ba9ba0441fa4ee481f96f209255ddab1f8
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/83-459dd684-ab5a-4eb4-a8c3-a496d08e237d.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/84-59be1f3e-2ee3-4d7d-bc0e-e67c98051c60.txn b/.lancedb/model2_fixed.lance/_transactions/84-59be1f3e-2ee3-4d7d-bc0e-e67c98051c60.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8e11d17cbde5b3f0abe28195e78b567352761993
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/84-59be1f3e-2ee3-4d7d-bc0e-e67c98051c60.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/85-8826bf81-5bb3-4e52-a428-2e6b653bae23.txn b/.lancedb/model2_fixed.lance/_transactions/85-8826bf81-5bb3-4e52-a428-2e6b653bae23.txn
new file mode 100644
index 0000000000000000000000000000000000000000..3c8481b4a14dc51ce447e7b875cfe3d78205effb
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/85-8826bf81-5bb3-4e52-a428-2e6b653bae23.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/86-9db8fba0-bc56-4927-9b37-04d185ffc43f.txn b/.lancedb/model2_fixed.lance/_transactions/86-9db8fba0-bc56-4927-9b37-04d185ffc43f.txn
new file mode 100644
index 0000000000000000000000000000000000000000..9356b8b10e33e9327a21df05a8b9a2ab67be9e7a
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/86-9db8fba0-bc56-4927-9b37-04d185ffc43f.txn differ
diff --git a/.lancedb/model2_fixed.lance/_transactions/9-f332414c-a0c1-40fc-aa25-998e15aab59b.txn b/.lancedb/model2_fixed.lance/_transactions/9-f332414c-a0c1-40fc-aa25-998e15aab59b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6c60f376007183a116b67be8c97e992d9d8c3104
Binary files /dev/null and b/.lancedb/model2_fixed.lance/_transactions/9-f332414c-a0c1-40fc-aa25-998e15aab59b.txn differ
diff --git a/.lancedb/model2_fixed.lance/data/01cef012-f105-4a8b-a1fb-752faf8f42d4.lance b/.lancedb/model2_fixed.lance/data/01cef012-f105-4a8b-a1fb-752faf8f42d4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..5b72839ed92250e3ce659efaf7cd3e9ec723f716
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/01cef012-f105-4a8b-a1fb-752faf8f42d4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:461e8b0fb1eeb6c20e7998fe84943f9d82c05b6c3ad14e2e0ad80b4b0d2f7e3d
+size 169608
diff --git a/.lancedb/model2_fixed.lance/data/044f0ec8-0fe1-4125-936d-58a80f5d5420.lance b/.lancedb/model2_fixed.lance/data/044f0ec8-0fe1-4125-936d-58a80f5d5420.lance
new file mode 100644
index 0000000000000000000000000000000000000000..9af1e31c51801a340e7f835402525e47f7b9ab76
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/044f0ec8-0fe1-4125-936d-58a80f5d5420.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8845972f005504a9eb32d24cfb74d911eb5a4a5ee077da80a4abc1a2e909ed6
+size 160372
diff --git a/.lancedb/model2_fixed.lance/data/05473457-76c8-4b85-b96c-561dc2ea15b1.lance b/.lancedb/model2_fixed.lance/data/05473457-76c8-4b85-b96c-561dc2ea15b1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f2d880c64579900584a39986c72fd9f403592d70
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/05473457-76c8-4b85-b96c-561dc2ea15b1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dfa189ad631f2d42454b93b7016070158c9cfd0565b8903541e0ef9ba2f2224
+size 160436
diff --git a/.lancedb/model2_fixed.lance/data/0751f89b-3208-4faf-a7ac-0b7b6c65a162.lance b/.lancedb/model2_fixed.lance/data/0751f89b-3208-4faf-a7ac-0b7b6c65a162.lance
new file mode 100644
index 0000000000000000000000000000000000000000..790e45b7d361bf5d170a7ca61b3104077d2309b1
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/0751f89b-3208-4faf-a7ac-0b7b6c65a162.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb6a3990ca364d639441b33c6160046376d6cd46ae86ad39e0b6c4ca14d71c75
+size 46867
diff --git a/.lancedb/model2_fixed.lance/data/0e78e35f-062d-4c93-bc95-6b7674b2d591.lance b/.lancedb/model2_fixed.lance/data/0e78e35f-062d-4c93-bc95-6b7674b2d591.lance
new file mode 100644
index 0000000000000000000000000000000000000000..624da12218891fd6618bb371daf90d0178fb7137
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/0e78e35f-062d-4c93-bc95-6b7674b2d591.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc1789abdb2862ab8b381f0550299fa0ae3f8a6a35c90af49837a619584539c
+size 158917
diff --git a/.lancedb/model2_fixed.lance/data/10d6eb55-219c-4d13-9871-ef67e1e70227.lance b/.lancedb/model2_fixed.lance/data/10d6eb55-219c-4d13-9871-ef67e1e70227.lance
new file mode 100644
index 0000000000000000000000000000000000000000..86f7077c1d9f63cd5af99399d56d0b1e2fb86b89
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/10d6eb55-219c-4d13-9871-ef67e1e70227.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e96549ba0d89eb0241e2c3ab226e7d2c552b58662697aabd6279f7929a716ec
+size 162947
diff --git a/.lancedb/model2_fixed.lance/data/12d0b8d6-3f91-4591-903f-976ea16bc33e.lance b/.lancedb/model2_fixed.lance/data/12d0b8d6-3f91-4591-903f-976ea16bc33e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..67cfacaaca8556ddf5874f4abd1e91672ea7c134
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/12d0b8d6-3f91-4591-903f-976ea16bc33e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed98c74774af92cb14d4ab0f30f6a9784423ec7d94f05f1dd315b599487c0742
+size 157649
diff --git a/.lancedb/model2_fixed.lance/data/183e2eef-3f64-42fd-8632-e2567091d076.lance b/.lancedb/model2_fixed.lance/data/183e2eef-3f64-42fd-8632-e2567091d076.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a078027fc3e07c82b25a94f6edd3f08014f40cc7
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/183e2eef-3f64-42fd-8632-e2567091d076.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9410d1b5448e6933dd36e5dcd17ba13c98831e144c14ea0021b9672d6eb006f4
+size 158196
diff --git a/.lancedb/model2_fixed.lance/data/18f4e862-8d9b-411c-b7e4-ec6f26bb40a7.lance b/.lancedb/model2_fixed.lance/data/18f4e862-8d9b-411c-b7e4-ec6f26bb40a7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..18e149a7ac6c1e17efa9671e12a1d7fe96c01218
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/18f4e862-8d9b-411c-b7e4-ec6f26bb40a7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed0fa435c960ba26e24a8747c8e7393acaa5a2a860d45bc2bbe0e9736b9e82ec
+size 161702
diff --git a/.lancedb/model2_fixed.lance/data/224f1dab-0bf8-43c7-ba0d-ac8765bc2e4b.lance b/.lancedb/model2_fixed.lance/data/224f1dab-0bf8-43c7-ba0d-ac8765bc2e4b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..81482c073ba9cd03e706ea16300f28be5c92e4f3
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/224f1dab-0bf8-43c7-ba0d-ac8765bc2e4b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04ec89b104527cdd58cc007375d7e31eddfc057a4b9088c77b33bcfb334bd01d
+size 165093
diff --git a/.lancedb/model2_fixed.lance/data/22f740bf-34d2-448e-9052-5b00a23e5c9c.lance b/.lancedb/model2_fixed.lance/data/22f740bf-34d2-448e-9052-5b00a23e5c9c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7963ece1494131e1f6b952a4e5b7ee9320d4ed30
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/22f740bf-34d2-448e-9052-5b00a23e5c9c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dc77adb904cd5c9703758d3a911860572d28808459290e3a8e2a434d832e456
+size 164259
diff --git a/.lancedb/model2_fixed.lance/data/2423771b-94d2-4c85-8ba1-14dd885af074.lance b/.lancedb/model2_fixed.lance/data/2423771b-94d2-4c85-8ba1-14dd885af074.lance
new file mode 100644
index 0000000000000000000000000000000000000000..85a5820fd59e414bff40a344f13c4e9d9e0c11d4
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/2423771b-94d2-4c85-8ba1-14dd885af074.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85dfcb96b5c5382ee79f871f0e2ad4874fa8c7a1f65dd8e4b8552777a7cec7d9
+size 168966
diff --git a/.lancedb/model2_fixed.lance/data/28c79a56-d307-4aa0-a2f9-59a1c1b9c144.lance b/.lancedb/model2_fixed.lance/data/28c79a56-d307-4aa0-a2f9-59a1c1b9c144.lance
new file mode 100644
index 0000000000000000000000000000000000000000..303947f8a77ba4c7c3915a09a8fe19139861c897
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/28c79a56-d307-4aa0-a2f9-59a1c1b9c144.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b272087c36996461ca33d65f53a58ee5e9eba50042f869bddab5940ee24902e6
+size 165231
diff --git a/.lancedb/model2_fixed.lance/data/2a5f5543-8015-4c07-8d2b-142a6c5bf181.lance b/.lancedb/model2_fixed.lance/data/2a5f5543-8015-4c07-8d2b-142a6c5bf181.lance
new file mode 100644
index 0000000000000000000000000000000000000000..752f456cea9b415e6f1a285ed351e70acb363c13
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/2a5f5543-8015-4c07-8d2b-142a6c5bf181.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a365cfdd81af8213d10cf222481e507fe0c196ff1389c22e6f6431867902d507
+size 162482
diff --git a/.lancedb/model2_fixed.lance/data/2cb28a5a-f1bf-4488-a45d-a7837bfdb5a5.lance b/.lancedb/model2_fixed.lance/data/2cb28a5a-f1bf-4488-a45d-a7837bfdb5a5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..aa7dc6185309747af73e333f6264b71cb7e32fc5
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/2cb28a5a-f1bf-4488-a45d-a7837bfdb5a5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5c4e16c2c7f3e84b88b4d4fce1789403efd862abc417083d8e419875cc7ca9c
+size 164369
diff --git a/.lancedb/model2_fixed.lance/data/37cec7ca-48f2-4d68-b11c-306ca7aed2c5.lance b/.lancedb/model2_fixed.lance/data/37cec7ca-48f2-4d68-b11c-306ca7aed2c5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..78f3a4f9965d38e55bf418598a779032baa9e4e5
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/37cec7ca-48f2-4d68-b11c-306ca7aed2c5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76d5332cf3c45a935465a0a5849af91b7ed1c179caadb9327faee2f3ac9f5a81
+size 164893
diff --git a/.lancedb/model2_fixed.lance/data/3a5552e3-1e5f-48ae-ba48-afc4f8e27ec1.lance b/.lancedb/model2_fixed.lance/data/3a5552e3-1e5f-48ae-ba48-afc4f8e27ec1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e1dde1952116267518f0b4bbfc8877dc060eda13
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/3a5552e3-1e5f-48ae-ba48-afc4f8e27ec1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:726a435aee8fbcaaaa293d6064cd716d7e1b70a28bccb899fd7fc64ae80de88f
+size 169060
diff --git a/.lancedb/model2_fixed.lance/data/3ef67558-4cee-4b0a-af90-c6378f589233.lance b/.lancedb/model2_fixed.lance/data/3ef67558-4cee-4b0a-af90-c6378f589233.lance
new file mode 100644
index 0000000000000000000000000000000000000000..584042c0c49fd3092daba6e00c269652796258f4
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/3ef67558-4cee-4b0a-af90-c6378f589233.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:893dfd80f81a23b235c357916bc8620d3dad166456b2288198fdf3154c15cbfe
+size 166026
diff --git a/.lancedb/model2_fixed.lance/data/40625c41-aa73-4576-914e-10b386bd8508.lance b/.lancedb/model2_fixed.lance/data/40625c41-aa73-4576-914e-10b386bd8508.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e166eead58108d54b8a5ed0942ddc8f5e22412e8
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/40625c41-aa73-4576-914e-10b386bd8508.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac77e413b9dcf5a8cf5e86ba1e1dabb1a49f35659a3997ce9315b7ca1368906
+size 155990
diff --git a/.lancedb/model2_fixed.lance/data/4092ea21-35ff-406f-9353-815d971b4c90.lance b/.lancedb/model2_fixed.lance/data/4092ea21-35ff-406f-9353-815d971b4c90.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d2cf005ea6b7bcbd6c971dabfbd0da68c5d325ec
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/4092ea21-35ff-406f-9353-815d971b4c90.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93f61411902468fbc54a42e2e58c267ebe10464e734368992f11cb05716b3f7f
+size 162078
diff --git a/.lancedb/model2_fixed.lance/data/41bc08b4-0434-490c-a2d5-1c66cf2d06a7.lance b/.lancedb/model2_fixed.lance/data/41bc08b4-0434-490c-a2d5-1c66cf2d06a7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4aea7cf8a1f0acd28813784ad5d1f0994f4f4145
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/41bc08b4-0434-490c-a2d5-1c66cf2d06a7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5bba337996a1ccb2541ac558a56015266f40950d9024a5c64414b554937c821
+size 165368
diff --git a/.lancedb/model2_fixed.lance/data/47c6509f-c1cf-4fc8-a3f6-3327d328ffe7.lance b/.lancedb/model2_fixed.lance/data/47c6509f-c1cf-4fc8-a3f6-3327d328ffe7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7803ec365335ad5e7e49dff28be0084900f9880d
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/47c6509f-c1cf-4fc8-a3f6-3327d328ffe7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fee3d252bd1d62a812cadd2642a37a24367f8c16ed977f04bc386e7f74d9cc26
+size 163396
diff --git a/.lancedb/model2_fixed.lance/data/4bd9c8eb-1134-46e6-b614-926b81548bf6.lance b/.lancedb/model2_fixed.lance/data/4bd9c8eb-1134-46e6-b614-926b81548bf6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..00809a762ae50aa9bf4f43ec8f47fbf671fbf773
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/4bd9c8eb-1134-46e6-b614-926b81548bf6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8171afb5850429e0161de64dbf7beb2569e924fc3e52d3016dc7ac7cf514d91c
+size 159633
diff --git a/.lancedb/model2_fixed.lance/data/4deb4ae6-f954-445e-bc88-f1fc185a3d15.lance b/.lancedb/model2_fixed.lance/data/4deb4ae6-f954-445e-bc88-f1fc185a3d15.lance
new file mode 100644
index 0000000000000000000000000000000000000000..044cc48a0fff448ef0abfd74aba215b93f4291ec
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/4deb4ae6-f954-445e-bc88-f1fc185a3d15.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60585b6d533c31284562ec0edf5036d2aa25b1792a12407b3e3fbecb068de29a
+size 162618
diff --git a/.lancedb/model2_fixed.lance/data/4e14363b-eaaa-48a1-9c61-97a43eca9759.lance b/.lancedb/model2_fixed.lance/data/4e14363b-eaaa-48a1-9c61-97a43eca9759.lance
new file mode 100644
index 0000000000000000000000000000000000000000..df66c142f7d246cb2b8b74cdcf3e7a0e29d6dce1
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/4e14363b-eaaa-48a1-9c61-97a43eca9759.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26b6471eaf6c1864e92bbe81c798d55e371bbb64cb87f857fac353e56ba2e794
+size 161008
diff --git a/.lancedb/model2_fixed.lance/data/4f4c9813-e7ea-48fc-b42f-cf3d5de01e0c.lance b/.lancedb/model2_fixed.lance/data/4f4c9813-e7ea-48fc-b42f-cf3d5de01e0c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c9fc9bfe02607f5d89dee3527a0185ab92367a4e
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/4f4c9813-e7ea-48fc-b42f-cf3d5de01e0c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2f4718bf651008798c3fde9d85b0141ed03658dae793a3cdf27b0b22f51ad98
+size 156767
diff --git a/.lancedb/model2_fixed.lance/data/50100a27-245e-4296-80f3-dcec414ae28f.lance b/.lancedb/model2_fixed.lance/data/50100a27-245e-4296-80f3-dcec414ae28f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0faa01f28541e05eb081065b4283d683b95d31db
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/50100a27-245e-4296-80f3-dcec414ae28f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aed01ca0a58595e0a7cd7597d4fe5b2b8d8edfc9b0fda19e5ab446f814cf1897
+size 168482
diff --git a/.lancedb/model2_fixed.lance/data/5045d097-13ef-40f3-b720-d7751d158876.lance b/.lancedb/model2_fixed.lance/data/5045d097-13ef-40f3-b720-d7751d158876.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2a093d737330510d23cfd685f5b13081b2bad797
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/5045d097-13ef-40f3-b720-d7751d158876.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecbcb43311bfa45f08917b32a7108b6b9e1084b65a41f32b866bd306b6555013
+size 164232
diff --git a/.lancedb/model2_fixed.lance/data/51df9218-33cb-4cb5-b602-421dfc04e464.lance b/.lancedb/model2_fixed.lance/data/51df9218-33cb-4cb5-b602-421dfc04e464.lance
new file mode 100644
index 0000000000000000000000000000000000000000..083f3f15a34ae1f749c51a614ceb50326598e4b0
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/51df9218-33cb-4cb5-b602-421dfc04e464.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8decbcc2998c9026dc011d74e6fc30f7be664ffd823e5f936e9a822a63731786
+size 162087
diff --git a/.lancedb/model2_fixed.lance/data/530ebc9e-ec98-4826-ab07-aa42bdb2ebcc.lance b/.lancedb/model2_fixed.lance/data/530ebc9e-ec98-4826-ab07-aa42bdb2ebcc.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4eed1351f98c83d8478980c522e994383c9c9e59
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/530ebc9e-ec98-4826-ab07-aa42bdb2ebcc.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a72135ccb5e2f6bde4c88101b3689a3fa5ce8d61aec6fa78974a5db800a08eb
+size 166399
diff --git a/.lancedb/model2_fixed.lance/data/5476c0c5-d10a-4362-b8d2-468454adc00b.lance b/.lancedb/model2_fixed.lance/data/5476c0c5-d10a-4362-b8d2-468454adc00b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..940c6ba69a809a67e48bc63fe7bd1325ef8eca62
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/5476c0c5-d10a-4362-b8d2-468454adc00b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:836bf5ca8e1a534f10126542e03080a9c4956205fc9ec45b3792361b4f080534
+size 159670
diff --git a/.lancedb/model2_fixed.lance/data/55fd1b61-2d53-47de-b69f-3da298662441.lance b/.lancedb/model2_fixed.lance/data/55fd1b61-2d53-47de-b69f-3da298662441.lance
new file mode 100644
index 0000000000000000000000000000000000000000..890084866757de7276acb53af36190392ed35021
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/55fd1b61-2d53-47de-b69f-3da298662441.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4165498ea502d01d892176274b71116cd231998a695e474e5a08deca552874f
+size 160783
diff --git a/.lancedb/model2_fixed.lance/data/573dd9e5-6b7d-4ad3-9d60-d9c7afb8b6b9.lance b/.lancedb/model2_fixed.lance/data/573dd9e5-6b7d-4ad3-9d60-d9c7afb8b6b9.lance
new file mode 100644
index 0000000000000000000000000000000000000000..32f7f46e2acd18a454fb2ae90011bada090e93de
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/573dd9e5-6b7d-4ad3-9d60-d9c7afb8b6b9.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e99270e6f8b938a2f6f34beb03f41aa4af32d927dd01865965108d4d3f1be73
+size 158418
diff --git a/.lancedb/model2_fixed.lance/data/5c94eff1-005e-4fe9-a40a-17aed2d79946.lance b/.lancedb/model2_fixed.lance/data/5c94eff1-005e-4fe9-a40a-17aed2d79946.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ae752d5a6d51b97e429104f76a201332d8c464d2
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/5c94eff1-005e-4fe9-a40a-17aed2d79946.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb6d32de2d8f2da6a3d1f50d171a8aae305546fc00989996d9f0856c3e385cf
+size 166833
diff --git a/.lancedb/model2_fixed.lance/data/5f0f288c-58dc-46d5-8962-082795c602e8.lance b/.lancedb/model2_fixed.lance/data/5f0f288c-58dc-46d5-8962-082795c602e8.lance
new file mode 100644
index 0000000000000000000000000000000000000000..3219cfc02e209f8f1a528fbf80df3a0b32b699f6
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/5f0f288c-58dc-46d5-8962-082795c602e8.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8fe307bb7c1208db629a96fc2bd6223e196a8eb09f3cfee4df6052f78708571
+size 163998
diff --git a/.lancedb/model2_fixed.lance/data/6634c95d-e5d0-44f9-8cc2-857d2d82e6d6.lance b/.lancedb/model2_fixed.lance/data/6634c95d-e5d0-44f9-8cc2-857d2d82e6d6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..44f13a5b17ff0daaf089f44b97493ef7d47e181c
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/6634c95d-e5d0-44f9-8cc2-857d2d82e6d6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21c1fbd02d70a9dbc465fc78636a24bafab99f7704a5b56e760e1e7b1fb4e100
+size 164639
diff --git a/.lancedb/model2_fixed.lance/data/67a6f288-2196-4c0d-ab64-b05395fac0dd.lance b/.lancedb/model2_fixed.lance/data/67a6f288-2196-4c0d-ab64-b05395fac0dd.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2d8d08760f91c59968bff7e5ab232899c1c7a8b4
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/67a6f288-2196-4c0d-ab64-b05395fac0dd.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19227ece30d14658b3f4fa9eda492c2c37f751dff4ec4f2a29c48aa326a1f132
+size 167982
diff --git a/.lancedb/model2_fixed.lance/data/6b7b6410-5088-4e94-ba9f-ebd7804d40c5.lance b/.lancedb/model2_fixed.lance/data/6b7b6410-5088-4e94-ba9f-ebd7804d40c5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..09ee474c6e40d1906e73d51997d4393d0cadcbbf
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/6b7b6410-5088-4e94-ba9f-ebd7804d40c5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92ff36ac15ef7feeaf4f774185bc244193d7483b5322efe809cae60917988f75
+size 167499
diff --git a/.lancedb/model2_fixed.lance/data/6f96a677-c7f2-4c54-b87a-9cce48311f54.lance b/.lancedb/model2_fixed.lance/data/6f96a677-c7f2-4c54-b87a-9cce48311f54.lance
new file mode 100644
index 0000000000000000000000000000000000000000..38db920e9773b6ed01183162b0f6f6175eddbc55
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/6f96a677-c7f2-4c54-b87a-9cce48311f54.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93556d86959c0307bbb3651c7ccbfd50114b9fb9c2df32b63c86b22fb387b5d8
+size 167142
diff --git a/.lancedb/model2_fixed.lance/data/7034efda-baf6-490b-9bad-b4b3718e8107.lance b/.lancedb/model2_fixed.lance/data/7034efda-baf6-490b-9bad-b4b3718e8107.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d098edfc2907618b8a35f45c11257c474ba01560
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/7034efda-baf6-490b-9bad-b4b3718e8107.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64489b281036634c15944819d8f91f9852fa4178ffd5395d88e38ad37cdd4811
+size 160283
diff --git a/.lancedb/model2_fixed.lance/data/7390a8fd-17ce-4a31-8578-1fd31d95abb8.lance b/.lancedb/model2_fixed.lance/data/7390a8fd-17ce-4a31-8578-1fd31d95abb8.lance
new file mode 100644
index 0000000000000000000000000000000000000000..8aa20c7f931731bc5d02bad2bfbc795e6fc5be93
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/7390a8fd-17ce-4a31-8578-1fd31d95abb8.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2b8e73a00fc6ef490ea793c717647ed721e6c6a59b57632ce4d7937061d09d
+size 167618
diff --git a/.lancedb/model2_fixed.lance/data/739babcf-a885-40f8-8a80-898d1363cc08.lance b/.lancedb/model2_fixed.lance/data/739babcf-a885-40f8-8a80-898d1363cc08.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c2cc8ba78c787729a0bf40df09040f8d8603d823
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/739babcf-a885-40f8-8a80-898d1363cc08.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b5c9ee06230436630dafb553c9acede7bfff1b56920c8d28771265962813cb
+size 176083
diff --git a/.lancedb/model2_fixed.lance/data/74894780-4f9e-4832-a4c8-1b479f1a26f3.lance b/.lancedb/model2_fixed.lance/data/74894780-4f9e-4832-a4c8-1b479f1a26f3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c55d5a675b1404054f78c36fce8552448f3c5b95
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/74894780-4f9e-4832-a4c8-1b479f1a26f3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aedb21f6e855ac8fd12b4e1caad24223c65e8e0db22826c277998c7c0298fb5
+size 162930
diff --git a/.lancedb/model2_fixed.lance/data/7776ce94-b062-43bb-8a0b-18717f5edab4.lance b/.lancedb/model2_fixed.lance/data/7776ce94-b062-43bb-8a0b-18717f5edab4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ebaffded3f7378974d0ae20c723f1bf82d8a0abd
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/7776ce94-b062-43bb-8a0b-18717f5edab4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26c2d38d68f58d97135c023cc609a3079531d046c58de6beb5df4c1f683786a6
+size 161322
diff --git a/.lancedb/model2_fixed.lance/data/7bfb05ea-a973-4c8c-a645-08a76d1539e8.lance b/.lancedb/model2_fixed.lance/data/7bfb05ea-a973-4c8c-a645-08a76d1539e8.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4ea56cb7dadc6d507021ac10cf510db2e1a9d1d7
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/7bfb05ea-a973-4c8c-a645-08a76d1539e8.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:345a21fb59d4b28947997eee342f34a11f1f020d3ee8349502c5960da2bb805f
+size 163614
diff --git a/.lancedb/model2_fixed.lance/data/7fe1c43e-867b-44b2-a017-58bf0472a2fd.lance b/.lancedb/model2_fixed.lance/data/7fe1c43e-867b-44b2-a017-58bf0472a2fd.lance
new file mode 100644
index 0000000000000000000000000000000000000000..14844fcd7eff7e4b9b912aef2c949b73356b23f8
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/7fe1c43e-867b-44b2-a017-58bf0472a2fd.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:203b46d0e6cd90f61b24f9ea95b5729b4c4a0256b722336da586ef20f97fe27e
+size 158079
diff --git a/.lancedb/model2_fixed.lance/data/83b4f65b-f860-49b6-a077-4728243530f7.lance b/.lancedb/model2_fixed.lance/data/83b4f65b-f860-49b6-a077-4728243530f7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..00096130a5eb71bd3c03ed5015af9365f3cd7906
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/83b4f65b-f860-49b6-a077-4728243530f7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f894e5491c65154656bb29d247791e9ae5eeec48f37c4041f59e52808ac378ad
+size 167885
diff --git a/.lancedb/model2_fixed.lance/data/83cf507f-ee59-4cf6-8e9c-71a3562543b7.lance b/.lancedb/model2_fixed.lance/data/83cf507f-ee59-4cf6-8e9c-71a3562543b7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..5398cc06127955c7ba7e3dfc3bedb2b4ddf7bc63
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/83cf507f-ee59-4cf6-8e9c-71a3562543b7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dbd939048ccf7c353f1a2dad992ea95b25899ae99d6cd4b0ec5da5dfc30c87f
+size 158396
diff --git a/.lancedb/model2_fixed.lance/data/8514c953-6f28-47f0-9199-c3b2956d9d7f.lance b/.lancedb/model2_fixed.lance/data/8514c953-6f28-47f0-9199-c3b2956d9d7f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..243bcacbc4ca98961778b1966751dac6cf83790e
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/8514c953-6f28-47f0-9199-c3b2956d9d7f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:130f27945561ac1847368246cce89230e2b64dc41e35f10d243d8da49c619edd
+size 157477
diff --git a/.lancedb/model2_fixed.lance/data/89ce1258-bea8-42b4-9271-2f41bb5461b5.lance b/.lancedb/model2_fixed.lance/data/89ce1258-bea8-42b4-9271-2f41bb5461b5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..fc1be0ce4c8448c4e23ecac5a01ae5a54858a6e6
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/89ce1258-bea8-42b4-9271-2f41bb5461b5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35b3b518762f3b71cfaf9248db4166569affc39880ce96359ac79279a522c8a2
+size 164381
diff --git a/.lancedb/model2_fixed.lance/data/8b40e7b8-443d-4ae3-992c-0dec68973ce4.lance b/.lancedb/model2_fixed.lance/data/8b40e7b8-443d-4ae3-992c-0dec68973ce4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4fe1276fbdc298c4307d551960d2e68e38464582
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/8b40e7b8-443d-4ae3-992c-0dec68973ce4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30bdd2dc08abdf18e244a0adf3c49dbe63c3d6457355bd282244dead36977904
+size 163290
diff --git a/.lancedb/model2_fixed.lance/data/93db4b58-36cd-4ffe-aa78-9ca8ebf91894.lance b/.lancedb/model2_fixed.lance/data/93db4b58-36cd-4ffe-aa78-9ca8ebf91894.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a361b0c63933929311518f222302b1aa431c8e01
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/93db4b58-36cd-4ffe-aa78-9ca8ebf91894.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d16aa7e174dca6077c8fde57ad53710fc5b0e18b442f3e7fd443a8e58b4cf5e3
+size 163355
diff --git a/.lancedb/model2_fixed.lance/data/9a98bd97-5bc6-407f-b5f7-6e5fdcb5188b.lance b/.lancedb/model2_fixed.lance/data/9a98bd97-5bc6-407f-b5f7-6e5fdcb5188b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..e5f953511339b9ab97cbd4b3f6b84cdb5ed52d1b
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/9a98bd97-5bc6-407f-b5f7-6e5fdcb5188b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdfa2372c33f7756f086cd044b32efeebce1aee67925b385fa9a2419d59ee48c
+size 168003
diff --git a/.lancedb/model2_fixed.lance/data/9f87e728-0e77-4496-a94a-b7d30c09ea1d.lance b/.lancedb/model2_fixed.lance/data/9f87e728-0e77-4496-a94a-b7d30c09ea1d.lance
new file mode 100644
index 0000000000000000000000000000000000000000..41a37327e2e5b9c7d267a396eba45425e9528670
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/9f87e728-0e77-4496-a94a-b7d30c09ea1d.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c37cc55db50b83a742f58036f8f6d6ee4a62dd66d812723cd04d684e7508425
+size 161586
diff --git a/.lancedb/model2_fixed.lance/data/a1d39316-a11b-4448-b1e6-901c3de5b2a7.lance b/.lancedb/model2_fixed.lance/data/a1d39316-a11b-4448-b1e6-901c3de5b2a7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..28fdcfd5abf97683fab98b467ef6b120950129cd
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a1d39316-a11b-4448-b1e6-901c3de5b2a7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d60b751a37f5284806a6552215b50a1959f18545cb0e9fad3b134d1064ed520
+size 161447
diff --git a/.lancedb/model2_fixed.lance/data/a1fb2f57-3302-4ee4-96d3-7bf0a1bae7d0.lance b/.lancedb/model2_fixed.lance/data/a1fb2f57-3302-4ee4-96d3-7bf0a1bae7d0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ac062e00b739bedadbdde5986fef4412fdfd93e3
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a1fb2f57-3302-4ee4-96d3-7bf0a1bae7d0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9680345eea6d20d72bcc2ba326d2a1a04f2d09626d159b18145ca759dde7e0b
+size 156677
diff --git a/.lancedb/model2_fixed.lance/data/a2a1b901-7e1c-4dc6-836c-e5dff366732b.lance b/.lancedb/model2_fixed.lance/data/a2a1b901-7e1c-4dc6-836c-e5dff366732b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..463f659bf21ac07975a2384a909d307f5a5d41b8
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a2a1b901-7e1c-4dc6-836c-e5dff366732b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f861ae144608df2c09e4ed45ced91435e61731146de2596c35aab37b24912729
+size 164008
diff --git a/.lancedb/model2_fixed.lance/data/a4f9dcd8-d1d1-4c5d-8736-e34ac5133df3.lance b/.lancedb/model2_fixed.lance/data/a4f9dcd8-d1d1-4c5d-8736-e34ac5133df3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..cbbf7a2d3a2c89ac101cab629c91889e1ab34e20
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a4f9dcd8-d1d1-4c5d-8736-e34ac5133df3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8611c104d8cf8b82b518a031efcb562a894fb72bf6eb30d384df804d18d9e7e
+size 164461
diff --git a/.lancedb/model2_fixed.lance/data/a613c46c-4ebd-475c-9d1c-71e5b1f6d11f.lance b/.lancedb/model2_fixed.lance/data/a613c46c-4ebd-475c-9d1c-71e5b1f6d11f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a245c1a679c01a2a6cce885fe2050889653603e6
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a613c46c-4ebd-475c-9d1c-71e5b1f6d11f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:947e20dfd8b3c68cbf1cdc2da78d3610e2846b2dc2ee171490fe67e72df2b01b
+size 163546
diff --git a/.lancedb/model2_fixed.lance/data/a93dc834-ce05-4cfa-952e-42f4625c1ed5.lance b/.lancedb/model2_fixed.lance/data/a93dc834-ce05-4cfa-952e-42f4625c1ed5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..78c30f53c9d631c52481941a0b00da77c6231484
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/a93dc834-ce05-4cfa-952e-42f4625c1ed5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6766cc900db33276e849731d411e2ccee2c6800df9aad60ed0daa74fec76630
+size 159184
diff --git a/.lancedb/model2_fixed.lance/data/aa217f85-e61d-4bf6-8cf6-938e9ee1e250.lance b/.lancedb/model2_fixed.lance/data/aa217f85-e61d-4bf6-8cf6-938e9ee1e250.lance
new file mode 100644
index 0000000000000000000000000000000000000000..a0dfb13c3b11dffc9d0d109945609427b3a52fb0
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/aa217f85-e61d-4bf6-8cf6-938e9ee1e250.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a48ed03433d143ee88a394f67b2f0619d6e4e894ed65be792a6563f4199cc9c5
+size 159745
diff --git a/.lancedb/model2_fixed.lance/data/ae6e0214-1f0d-4c4c-9845-cc6047395694.lance b/.lancedb/model2_fixed.lance/data/ae6e0214-1f0d-4c4c-9845-cc6047395694.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2e3aceb9ad8a8763a70fc3609a5d8f603c072da4
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/ae6e0214-1f0d-4c4c-9845-cc6047395694.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d18f73706e73315c6cc3080b406f982e227fff1bf9013af46689bb11163a6cc3
+size 168430
diff --git a/.lancedb/model2_fixed.lance/data/afe0dbb1-a5a5-41a0-91a5-18ed41e3a1d5.lance b/.lancedb/model2_fixed.lance/data/afe0dbb1-a5a5-41a0-91a5-18ed41e3a1d5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..99e20fd414a7441756ffd02b61a2df9321c83226
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/afe0dbb1-a5a5-41a0-91a5-18ed41e3a1d5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea9533abcd958738c06dfb9d3ac45d27c073fdbb16705b9b7b3942bbfe09a9c4
+size 163626
diff --git a/.lancedb/model2_fixed.lance/data/b00a2d8a-7bd9-4082-86c6-33e2684ed94e.lance b/.lancedb/model2_fixed.lance/data/b00a2d8a-7bd9-4082-86c6-33e2684ed94e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..19d8420a0762211da159fa214c07d066da3f6819
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/b00a2d8a-7bd9-4082-86c6-33e2684ed94e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29f67bd2820e324e5e94143878c0a23a884dc2f9e7ba91410f8a3cc8ef5e91e2
+size 158663
diff --git a/.lancedb/model2_fixed.lance/data/b0bb1a51-3731-4f1b-9b2d-bcff0a30dd39.lance b/.lancedb/model2_fixed.lance/data/b0bb1a51-3731-4f1b-9b2d-bcff0a30dd39.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ff0ca64d15fafad21bd0d335cecbcd1c152bed67
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/b0bb1a51-3731-4f1b-9b2d-bcff0a30dd39.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab88f4ee3322062611bfb4aa3277678ede497638f06d325d0ea39555854bdaa2
+size 157484
diff --git a/.lancedb/model2_fixed.lance/data/b5142bf4-89c0-4a02-a97d-95e4613ec100.lance b/.lancedb/model2_fixed.lance/data/b5142bf4-89c0-4a02-a97d-95e4613ec100.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b7add66fd0f98bd33af52926df5d6e1062ed1bc6
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/b5142bf4-89c0-4a02-a97d-95e4613ec100.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4eedc0dd9d383147701329c8141707d6fc5f22b7b7d59a7de2b2154445816ece
+size 159984
diff --git a/.lancedb/model2_fixed.lance/data/bb22023a-8045-4c63-a6e4-4158e60e431e.lance b/.lancedb/model2_fixed.lance/data/bb22023a-8045-4c63-a6e4-4158e60e431e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b9a330c2654372386b9236085f513be39f7f3d65
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/bb22023a-8045-4c63-a6e4-4158e60e431e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b503f6a76a146346f1db0c880ab94e32c4ec7ea612a7f162643438c454d140f9
+size 157447
diff --git a/.lancedb/model2_fixed.lance/data/c335902f-698c-4c8e-a456-990ecaff1eba.lance b/.lancedb/model2_fixed.lance/data/c335902f-698c-4c8e-a456-990ecaff1eba.lance
new file mode 100644
index 0000000000000000000000000000000000000000..1d7d9027291b21698c3a99e1946e788bc2808f13
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/c335902f-698c-4c8e-a456-990ecaff1eba.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21dd004b704e90e708b01c5e3d84c4502b9fb74a79e82fa7309f7665d09ebaac
+size 159490
diff --git a/.lancedb/model2_fixed.lance/data/c8923668-31fa-4d20-b9d8-f70bc0d5c90b.lance b/.lancedb/model2_fixed.lance/data/c8923668-31fa-4d20-b9d8-f70bc0d5c90b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..1ad3f10d7edc0e063843273812b2b7ca4f71a317
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/c8923668-31fa-4d20-b9d8-f70bc0d5c90b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ddcc5a44c5c14dff4eea3b360321ad1ee6dddc4844a7ebca9982fa5101c1f8
+size 169727
diff --git a/.lancedb/model2_fixed.lance/data/c97a4969-8445-455d-8602-9a9409295866.lance b/.lancedb/model2_fixed.lance/data/c97a4969-8445-455d-8602-9a9409295866.lance
new file mode 100644
index 0000000000000000000000000000000000000000..969cbebd25c484d3b2f44c2de81e6e2aed83f9d2
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/c97a4969-8445-455d-8602-9a9409295866.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6ccfb81a19a7d9dea5e3ea234e341c6ae71ba23fb52f39c235c0db26d12dfaa
+size 156823
diff --git a/.lancedb/model2_fixed.lance/data/ce120fb4-bc1a-4b1e-a098-af04fff63951.lance b/.lancedb/model2_fixed.lance/data/ce120fb4-bc1a-4b1e-a098-af04fff63951.lance
new file mode 100644
index 0000000000000000000000000000000000000000..29b1a1474b74d9848f7f7e2b8c5eff26908ff196
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/ce120fb4-bc1a-4b1e-a098-af04fff63951.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76ae44cbee7c9cf50ed87144a6ba043053aab7904de2c7af355077ba317b466f
+size 159197
diff --git a/.lancedb/model2_fixed.lance/data/d250ca21-0ade-4f8d-9199-29b50bc93fb2.lance b/.lancedb/model2_fixed.lance/data/d250ca21-0ade-4f8d-9199-29b50bc93fb2.lance
new file mode 100644
index 0000000000000000000000000000000000000000..82a94f2c46a72706045c7033bde6e80641ee8b34
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/d250ca21-0ade-4f8d-9199-29b50bc93fb2.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:933d7a932c908894917fbfec9e5a84dc906c61fa2f5f8a37fb02c500a93420a0
+size 182856
diff --git a/.lancedb/model2_fixed.lance/data/d2dde8a9-e90e-495a-bb72-691c99d81c6e.lance b/.lancedb/model2_fixed.lance/data/d2dde8a9-e90e-495a-bb72-691c99d81c6e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f1677b4c26fb327bc7dae2661eb774571bfb506d
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/d2dde8a9-e90e-495a-bb72-691c99d81c6e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5352b6bc405d7a2b3b1991d0df36eb833461ec3a0853ccc7bdf49f23ec295670
+size 163234
diff --git a/.lancedb/model2_fixed.lance/data/d9c91c31-3849-4951-aa22-736f1cc656eb.lance b/.lancedb/model2_fixed.lance/data/d9c91c31-3849-4951-aa22-736f1cc656eb.lance
new file mode 100644
index 0000000000000000000000000000000000000000..cc8c198bda42cb1057300d45cdcefa7edf093892
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/d9c91c31-3849-4951-aa22-736f1cc656eb.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1500a2f88ba417fdb0025ab6cc51d1baf4270ed0fe837162bd9f7a9b5c2cd9f
+size 155946
diff --git a/.lancedb/model2_fixed.lance/data/dd56eb1f-9151-45fc-af7b-d3969427b91a.lance b/.lancedb/model2_fixed.lance/data/dd56eb1f-9151-45fc-af7b-d3969427b91a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0054dea8b61df027c4e05b55457ae58bc8b17218
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/dd56eb1f-9151-45fc-af7b-d3969427b91a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3277a692b9252f53d22a0335f05726a90ff0c5e06fdc4eb7662f8ba9dc587cd
+size 161449
diff --git a/.lancedb/model2_fixed.lance/data/e3ead8c2-5c25-47a6-a380-0a6c6d3f97eb.lance b/.lancedb/model2_fixed.lance/data/e3ead8c2-5c25-47a6-a380-0a6c6d3f97eb.lance
new file mode 100644
index 0000000000000000000000000000000000000000..16ff9d36e66d9598616f798f7d6736623a7f3e02
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/e3ead8c2-5c25-47a6-a380-0a6c6d3f97eb.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7aa2a9e426f5c813d9a115e285ed34e1f20ec07f9a67bd77a27dd9add38af29
+size 163217
diff --git a/.lancedb/model2_fixed.lance/data/e7aa8fc2-c62f-4999-b6ac-839f1717ea7c.lance b/.lancedb/model2_fixed.lance/data/e7aa8fc2-c62f-4999-b6ac-839f1717ea7c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..211640acebe3708ca7a21cf3393c19672d09db54
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/e7aa8fc2-c62f-4999-b6ac-839f1717ea7c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d62bf890b818329148355788074c1a8cc5a7fa629b09f5164ddbd70769566328
+size 163470
diff --git a/.lancedb/model2_fixed.lance/data/e9e19be1-ed24-46ec-a6ac-030e95eeabd1.lance b/.lancedb/model2_fixed.lance/data/e9e19be1-ed24-46ec-a6ac-030e95eeabd1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2a87e6a30dc8042770fb0529359b5ae963c7589e
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/e9e19be1-ed24-46ec-a6ac-030e95eeabd1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dccb8620c87f490b5444cb738ef1f168d242c8689bdb3d5681dcae11d7b10f98
+size 161202
diff --git a/.lancedb/model2_fixed.lance/data/f34074d8-65f7-48fe-b162-1fed368c9178.lance b/.lancedb/model2_fixed.lance/data/f34074d8-65f7-48fe-b162-1fed368c9178.lance
new file mode 100644
index 0000000000000000000000000000000000000000..19e527aa4f26a7dc6724abfcb1948154b4291982
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/f34074d8-65f7-48fe-b162-1fed368c9178.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:058f0e376196a8661a1295f6a2c35b7aa5d6e5f8b88d968b7dbf8aea523bb78c
+size 168893
diff --git a/.lancedb/model2_fixed.lance/data/f5d8d4c3-c6b0-4aaf-a944-f4a825b039d9.lance b/.lancedb/model2_fixed.lance/data/f5d8d4c3-c6b0-4aaf-a944-f4a825b039d9.lance
new file mode 100644
index 0000000000000000000000000000000000000000..db06ca6b4f4f5465e6deae75c6baf89aaa5f959f
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/f5d8d4c3-c6b0-4aaf-a944-f4a825b039d9.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:351047e8a88c21b81fc3f37948887ca79fbdddf9ec81e855f58fcb6214488f2b
+size 169824
diff --git a/.lancedb/model2_fixed.lance/data/f86677a4-8dab-463a-a51d-93c1b7bc2c7a.lance b/.lancedb/model2_fixed.lance/data/f86677a4-8dab-463a-a51d-93c1b7bc2c7a.lance
new file mode 100644
index 0000000000000000000000000000000000000000..3cdb1f4de8b65e3da1f54b9a33eff2df010f83a1
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/f86677a4-8dab-463a-a51d-93c1b7bc2c7a.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c6e0ea55159780d090061d3ed9504cc588393a4cf1f126d833cd0f494d6d6c1
+size 161641
diff --git a/.lancedb/model2_fixed.lance/data/f8e5e605-2bc9-4fa2-8216-110b6441847f.lance b/.lancedb/model2_fixed.lance/data/f8e5e605-2bc9-4fa2-8216-110b6441847f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..c30ff692ffa1b1e1cef32b8a45271481a7a55405
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/f8e5e605-2bc9-4fa2-8216-110b6441847f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca15443a4a3f2fe679b230400c29fbda0e6527dfd6cb698b09a5a2a3e5c07fbf
+size 161598
diff --git a/.lancedb/model2_fixed.lance/data/f9a4ce3c-be06-4251-9bc3-e59254f8691e.lance b/.lancedb/model2_fixed.lance/data/f9a4ce3c-be06-4251-9bc3-e59254f8691e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..42364527023891b8f4167de69ca1c5ad4d3152a7
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/f9a4ce3c-be06-4251-9bc3-e59254f8691e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d646574cec39ca443cd2766b8e98a6ceb767ba661a81e5cdc05bd6970f47143
+size 162351
diff --git a/.lancedb/model2_fixed.lance/data/fb229c76-2591-4fe0-ad38-72c29d8e1353.lance b/.lancedb/model2_fixed.lance/data/fb229c76-2591-4fe0-ad38-72c29d8e1353.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f329113534e4f59b31b2ba17cc178619f5ef0fc3
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/fb229c76-2591-4fe0-ad38-72c29d8e1353.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73917330e67f089c17cb87a124ebd0f50ba4606a24105339aabcd4332f556191
+size 160374
diff --git a/.lancedb/model2_fixed.lance/data/fbe9cf1b-6096-45ae-88ca-bc425a8ab633.lance b/.lancedb/model2_fixed.lance/data/fbe9cf1b-6096-45ae-88ca-bc425a8ab633.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2fa76372426ebef7d20b1127d292c0bbd4671dcb
--- /dev/null
+++ b/.lancedb/model2_fixed.lance/data/fbe9cf1b-6096-45ae-88ca-bc425a8ab633.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc9b1648c455562360063823051f3be883deca3da1261152e98e1f33c4310703
+size 160540
diff --git a/.lancedb/model3_fixed.lance/_indices/b875f5df-9571-488d-8e81-5844dd99faf9/index.idx b/.lancedb/model3_fixed.lance/_indices/b875f5df-9571-488d-8e81-5844dd99faf9/index.idx
new file mode 100644
index 0000000000000000000000000000000000000000..5443ed35bb453ea3e5f2365418b20907f3b933a9
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/_indices/b875f5df-9571-488d-8e81-5844dd99faf9/index.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f66ee59d47b330f314fe1c2571373284c0f5c3050b0e4490b1d075ba7840a3d
+size 3265152
diff --git a/.lancedb/model3_fixed.lance/_transactions/0-f9a2885d-bd7d-42ec-a710-609db4ff1eca.txn b/.lancedb/model3_fixed.lance/_transactions/0-f9a2885d-bd7d-42ec-a710-609db4ff1eca.txn
new file mode 100644
index 0000000000000000000000000000000000000000..fc99939c8dcb2af00295a6812187bfb42d6f4c8e
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/_transactions/0-f9a2885d-bd7d-42ec-a710-609db4ff1eca.txn
@@ -0,0 +1 @@
+$f9a2885d-bd7d-42ec-a710-609db4ff1eca�V3vector ���������*fixed_size_list:float:153608text ���������*string08
\ No newline at end of file
diff --git a/.lancedb/model3_fixed.lance/_transactions/1-229f1a48-40d8-4eeb-b256-a6941093f010.txn b/.lancedb/model3_fixed.lance/_transactions/1-229f1a48-40d8-4eeb-b256-a6941093f010.txn
new file mode 100644
index 0000000000000000000000000000000000000000..13439fac3e64f5db24c548beb9d015f952a57d0a
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/1-229f1a48-40d8-4eeb-b256-a6941093f010.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/10-cd9304cf-8774-4904-b26a-d3386237f76a.txn b/.lancedb/model3_fixed.lance/_transactions/10-cd9304cf-8774-4904-b26a-d3386237f76a.txn
new file mode 100644
index 0000000000000000000000000000000000000000..75a5b80baf29c38f3cc8e73d9ea27989dd11557f
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/10-cd9304cf-8774-4904-b26a-d3386237f76a.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/11-095db9cc-8ed7-44ce-ba8a-57b235ff5c8b.txn b/.lancedb/model3_fixed.lance/_transactions/11-095db9cc-8ed7-44ce-ba8a-57b235ff5c8b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f19938af427e3bfd5a06533a51661157cd45b131
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/11-095db9cc-8ed7-44ce-ba8a-57b235ff5c8b.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/12-f26e7ed5-d106-42d3-82ca-4ba3b488812e.txn b/.lancedb/model3_fixed.lance/_transactions/12-f26e7ed5-d106-42d3-82ca-4ba3b488812e.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e612db09627a36ec899b3dd68ebae89ad7aa5eae
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/12-f26e7ed5-d106-42d3-82ca-4ba3b488812e.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/13-389298bf-0302-4d8b-9398-606a96588c22.txn b/.lancedb/model3_fixed.lance/_transactions/13-389298bf-0302-4d8b-9398-606a96588c22.txn
new file mode 100644
index 0000000000000000000000000000000000000000..29a6a050a8e48fccda25fd3920cdc5c945b35adc
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/13-389298bf-0302-4d8b-9398-606a96588c22.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/14-edf3bab6-345c-4038-8734-43b6528113ab.txn b/.lancedb/model3_fixed.lance/_transactions/14-edf3bab6-345c-4038-8734-43b6528113ab.txn
new file mode 100644
index 0000000000000000000000000000000000000000..b65198a158a1e2397a3e171fc693ae41d9f5b9d5
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/14-edf3bab6-345c-4038-8734-43b6528113ab.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/15-25083f6f-6b0c-458e-8554-33bf8be4222c.txn b/.lancedb/model3_fixed.lance/_transactions/15-25083f6f-6b0c-458e-8554-33bf8be4222c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c5ba4c45ee560d1883bdbc142d6ecae825df8a47
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/15-25083f6f-6b0c-458e-8554-33bf8be4222c.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/16-095221db-5950-449d-bffa-99b61749e70d.txn b/.lancedb/model3_fixed.lance/_transactions/16-095221db-5950-449d-bffa-99b61749e70d.txn
new file mode 100644
index 0000000000000000000000000000000000000000..1a7dccec7b72c1743b746c9d29f9af02b567abdb
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/16-095221db-5950-449d-bffa-99b61749e70d.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/17-d128fcae-41bf-4391-b275-e10660734fb7.txn b/.lancedb/model3_fixed.lance/_transactions/17-d128fcae-41bf-4391-b275-e10660734fb7.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c3aed787ab59cdc2000284d53dc3b5c550ca3139
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/17-d128fcae-41bf-4391-b275-e10660734fb7.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/18-b96e1f65-a4bb-4d85-a7e7-6469c9dff2ae.txn b/.lancedb/model3_fixed.lance/_transactions/18-b96e1f65-a4bb-4d85-a7e7-6469c9dff2ae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..08c090424546607e4c3c339109e45e92af2245a1
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/18-b96e1f65-a4bb-4d85-a7e7-6469c9dff2ae.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/19-92639e98-f3d0-4188-a1df-df2795689dae.txn b/.lancedb/model3_fixed.lance/_transactions/19-92639e98-f3d0-4188-a1df-df2795689dae.txn
new file mode 100644
index 0000000000000000000000000000000000000000..73fbec5921cc21e8eff9e7569e4743972f60ac19
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/19-92639e98-f3d0-4188-a1df-df2795689dae.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/2-0d2bc581-9c9b-4cc7-85db-89383fd44721.txn b/.lancedb/model3_fixed.lance/_transactions/2-0d2bc581-9c9b-4cc7-85db-89383fd44721.txn
new file mode 100644
index 0000000000000000000000000000000000000000..e963100e4d95e6043cb30b3d052b9a75b58d9728
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/2-0d2bc581-9c9b-4cc7-85db-89383fd44721.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/20-175dc643-32c0-44ce-b913-88edf381f924.txn b/.lancedb/model3_fixed.lance/_transactions/20-175dc643-32c0-44ce-b913-88edf381f924.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f53d6228dbd870f8451ca0f2ae3221084884363f
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/20-175dc643-32c0-44ce-b913-88edf381f924.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/21-16297c9a-9801-405b-844b-ba672c1edcf5.txn b/.lancedb/model3_fixed.lance/_transactions/21-16297c9a-9801-405b-844b-ba672c1edcf5.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6655080df7f84e55757f76772eb5988df29be37c
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/21-16297c9a-9801-405b-844b-ba672c1edcf5.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/22-bea2518d-8111-4974-938e-06039618c487.txn b/.lancedb/model3_fixed.lance/_transactions/22-bea2518d-8111-4974-938e-06039618c487.txn
new file mode 100644
index 0000000000000000000000000000000000000000..bac0434d9a91947047004b91b743922d40e21dca
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/22-bea2518d-8111-4974-938e-06039618c487.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/23-d9909cd4-1911-4a2c-adcb-8040def61c21.txn b/.lancedb/model3_fixed.lance/_transactions/23-d9909cd4-1911-4a2c-adcb-8040def61c21.txn
new file mode 100644
index 0000000000000000000000000000000000000000..a63a5923cae90f9415f703d6ed5f04f722351719
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/23-d9909cd4-1911-4a2c-adcb-8040def61c21.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/24-aaeceb09-2972-4e78-a6a2-c32745b5b9af.txn b/.lancedb/model3_fixed.lance/_transactions/24-aaeceb09-2972-4e78-a6a2-c32745b5b9af.txn
new file mode 100644
index 0000000000000000000000000000000000000000..551f152b160e62c4e9b7e0fdc1607319a5765311
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/24-aaeceb09-2972-4e78-a6a2-c32745b5b9af.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/25-415b2e61-c8b3-4c3a-a434-8b9c1c0b1f6b.txn b/.lancedb/model3_fixed.lance/_transactions/25-415b2e61-c8b3-4c3a-a434-8b9c1c0b1f6b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c7f344838c9e8ad00ab8130a4cbdbd349de85212
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/25-415b2e61-c8b3-4c3a-a434-8b9c1c0b1f6b.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/26-d4d84f4c-3e78-4237-a52c-964e2c21b9e9.txn b/.lancedb/model3_fixed.lance/_transactions/26-d4d84f4c-3e78-4237-a52c-964e2c21b9e9.txn
new file mode 100644
index 0000000000000000000000000000000000000000..cab7d6811b45d7471aa0b4104bbd3755e20f6b7a
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/26-d4d84f4c-3e78-4237-a52c-964e2c21b9e9.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/27-1ad00283-49a2-4140-ae64-ffbd1901c2d4.txn b/.lancedb/model3_fixed.lance/_transactions/27-1ad00283-49a2-4140-ae64-ffbd1901c2d4.txn
new file mode 100644
index 0000000000000000000000000000000000000000..958f18e80d3ccf1fb76e6215fd1ba1136a655dbd
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/27-1ad00283-49a2-4140-ae64-ffbd1901c2d4.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/28-5e1ddea7-8ed5-4f6e-a161-2d3b2a31dc34.txn b/.lancedb/model3_fixed.lance/_transactions/28-5e1ddea7-8ed5-4f6e-a161-2d3b2a31dc34.txn
new file mode 100644
index 0000000000000000000000000000000000000000..44e0508960672162ad7fd976119c092c4b3bf814
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/28-5e1ddea7-8ed5-4f6e-a161-2d3b2a31dc34.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/29-360e5e95-4fbe-43cc-af8d-48115dd7e04c.txn b/.lancedb/model3_fixed.lance/_transactions/29-360e5e95-4fbe-43cc-af8d-48115dd7e04c.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0a9aee58e0276984ca21b6ed9f7bb4c4b4c9d1b3
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/29-360e5e95-4fbe-43cc-af8d-48115dd7e04c.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/3-9964f0fe-e1a9-40cc-ab1a-fd2e6da4c1d2.txn b/.lancedb/model3_fixed.lance/_transactions/3-9964f0fe-e1a9-40cc-ab1a-fd2e6da4c1d2.txn
new file mode 100644
index 0000000000000000000000000000000000000000..942c2a0dab6e1fe50567b991333980cd7d503d06
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/3-9964f0fe-e1a9-40cc-ab1a-fd2e6da4c1d2.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/4-86ff8a7a-234d-4730-8912-de4e2c3aafc3.txn b/.lancedb/model3_fixed.lance/_transactions/4-86ff8a7a-234d-4730-8912-de4e2c3aafc3.txn
new file mode 100644
index 0000000000000000000000000000000000000000..6762db7516240a5ae8097394588239fa61cdb959
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/4-86ff8a7a-234d-4730-8912-de4e2c3aafc3.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/5-d6bb690d-113b-4d1b-9203-092b894d1569.txn b/.lancedb/model3_fixed.lance/_transactions/5-d6bb690d-113b-4d1b-9203-092b894d1569.txn
new file mode 100644
index 0000000000000000000000000000000000000000..c8d5101e1d579f8294090f7fe9b09831335a1cf6
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/5-d6bb690d-113b-4d1b-9203-092b894d1569.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/6-1ccbe5f4-b4b7-49a8-8d83-6501ab3a5daa.txn b/.lancedb/model3_fixed.lance/_transactions/6-1ccbe5f4-b4b7-49a8-8d83-6501ab3a5daa.txn
new file mode 100644
index 0000000000000000000000000000000000000000..8422e5a3525e0c0d7a1f68cb5c0427ba9381d0e1
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/6-1ccbe5f4-b4b7-49a8-8d83-6501ab3a5daa.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/7-461828da-cc2d-4155-94e5-d247704ba743.txn b/.lancedb/model3_fixed.lance/_transactions/7-461828da-cc2d-4155-94e5-d247704ba743.txn
new file mode 100644
index 0000000000000000000000000000000000000000..7c3856ddadef56e3953521bd51922fdefb17e6d0
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/7-461828da-cc2d-4155-94e5-d247704ba743.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/8-f11c092f-06eb-48af-a3a1-bce5e993b551.txn b/.lancedb/model3_fixed.lance/_transactions/8-f11c092f-06eb-48af-a3a1-bce5e993b551.txn
new file mode 100644
index 0000000000000000000000000000000000000000..f37cf300f9097ad78270bcff1fd571d6b0d62ed2
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/8-f11c092f-06eb-48af-a3a1-bce5e993b551.txn differ
diff --git a/.lancedb/model3_fixed.lance/_transactions/9-4dc5eb9a-be01-480d-92c0-3cbab6bf146b.txn b/.lancedb/model3_fixed.lance/_transactions/9-4dc5eb9a-be01-480d-92c0-3cbab6bf146b.txn
new file mode 100644
index 0000000000000000000000000000000000000000..0c096bfd7526b1baa31bd219e72e16dc53dd431e
Binary files /dev/null and b/.lancedb/model3_fixed.lance/_transactions/9-4dc5eb9a-be01-480d-92c0-3cbab6bf146b.txn differ
diff --git a/.lancedb/model3_fixed.lance/data/051f523b-013f-4a7c-820f-7eb825c8fc6e.lance b/.lancedb/model3_fixed.lance/data/051f523b-013f-4a7c-820f-7eb825c8fc6e.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2c8aa2a452cf9f46d9e0aeb7a9de4fefcb154f8f
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/051f523b-013f-4a7c-820f-7eb825c8fc6e.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00efabc9b0545250bdadb48c5e028617ecfc17ca5a25bbcb491c858ed40b8486
+size 297252
diff --git a/.lancedb/model3_fixed.lance/data/2748964f-c3fe-4484-9d44-32d330f2bed5.lance b/.lancedb/model3_fixed.lance/data/2748964f-c3fe-4484-9d44-32d330f2bed5.lance
new file mode 100644
index 0000000000000000000000000000000000000000..2f200069d39cd14064cd25434e3b25395751ea6c
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/2748964f-c3fe-4484-9d44-32d330f2bed5.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dd6a89ce1237015cc6b66682fb5a28d8cfa993f755d8838eb01ef1ccfdd2ae2
+size 308266
diff --git a/.lancedb/model3_fixed.lance/data/2f15a41f-aece-4aae-aaac-4782361d15b7.lance b/.lancedb/model3_fixed.lance/data/2f15a41f-aece-4aae-aaac-4782361d15b7.lance
new file mode 100644
index 0000000000000000000000000000000000000000..9e70ba3ded6c3689cc3dd7dd1556decd7659c48d
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/2f15a41f-aece-4aae-aaac-4782361d15b7.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1e1ee44cc234908f8dbe20e226ccef7351b221ee146944736492667d0d88f72
+size 307020
diff --git a/.lancedb/model3_fixed.lance/data/35c51ba4-8e4e-42a0-8204-d3f7779f00e6.lance b/.lancedb/model3_fixed.lance/data/35c51ba4-8e4e-42a0-8204-d3f7779f00e6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f343d62cde9dc5d9f6c99030bc55e27da72f1e3d
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/35c51ba4-8e4e-42a0-8204-d3f7779f00e6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0fcede1c15b51ff1d018f1ca86998391b6cd0b998d2c203d785f448b208e3cd
+size 289779
diff --git a/.lancedb/model3_fixed.lance/data/3b6268f8-55ed-4204-b969-8f4db11beea1.lance b/.lancedb/model3_fixed.lance/data/3b6268f8-55ed-4204-b969-8f4db11beea1.lance
new file mode 100644
index 0000000000000000000000000000000000000000..3cd4af02c1f5a24c29aa80710288d2aa7478e23d
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/3b6268f8-55ed-4204-b969-8f4db11beea1.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9b9d7baa45f2fa824d41c189e9925b5253bb89782f7731fe8b99ff16823ebe0
+size 296285
diff --git a/.lancedb/model3_fixed.lance/data/475c1989-0f1a-46a8-8454-511236597135.lance b/.lancedb/model3_fixed.lance/data/475c1989-0f1a-46a8-8454-511236597135.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6f5b516db7df8951e68ac1c862723041220cbae7
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/475c1989-0f1a-46a8-8454-511236597135.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9be7c3ed0aee21bed50a85c0b17402bae8a6daa9c0724f3c4ff218d32d226331
+size 303734
diff --git a/.lancedb/model3_fixed.lance/data/48080818-7c5f-49cf-8a42-b036aa9e5ac3.lance b/.lancedb/model3_fixed.lance/data/48080818-7c5f-49cf-8a42-b036aa9e5ac3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..ed6ba60807d283fbf93b7536a558c558510fd660
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/48080818-7c5f-49cf-8a42-b036aa9e5ac3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69521f91011b0f64e9ac63ca616444344945372329599d63b570d02a5ad02248
+size 296131
diff --git a/.lancedb/model3_fixed.lance/data/5312f201-1221-482a-9e4a-7764b7490088.lance b/.lancedb/model3_fixed.lance/data/5312f201-1221-482a-9e4a-7764b7490088.lance
new file mode 100644
index 0000000000000000000000000000000000000000..47c35b1903acbb2afa5407c3b088123357711afa
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/5312f201-1221-482a-9e4a-7764b7490088.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36d65f96c7d113384ad1a0dc08a59aa5c4faae2084022de25dba50e83dbe3f55
+size 294400
diff --git a/.lancedb/model3_fixed.lance/data/625f549f-ec2b-4d8c-9585-60777a9736ad.lance b/.lancedb/model3_fixed.lance/data/625f549f-ec2b-4d8c-9585-60777a9736ad.lance
new file mode 100644
index 0000000000000000000000000000000000000000..8be7096a9302c50537c98c415f0ddd599b75d3f9
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/625f549f-ec2b-4d8c-9585-60777a9736ad.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a280f44560b221ed512a86d7b183c705e93a7e0f801afa726a32e0b42811608
+size 295413
diff --git a/.lancedb/model3_fixed.lance/data/637c5d89-4641-4e86-8ecd-e44da395b418.lance b/.lancedb/model3_fixed.lance/data/637c5d89-4641-4e86-8ecd-e44da395b418.lance
new file mode 100644
index 0000000000000000000000000000000000000000..566efbecd81b1b24b769f6ccdd4487852e701029
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/637c5d89-4641-4e86-8ecd-e44da395b418.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:218202b7e7bd8951985420523ad0a5a260082e858532f624e418c98d45ab7846
+size 299604
diff --git a/.lancedb/model3_fixed.lance/data/67f89c33-802b-439c-ac4a-5c7bed36ad68.lance b/.lancedb/model3_fixed.lance/data/67f89c33-802b-439c-ac4a-5c7bed36ad68.lance
new file mode 100644
index 0000000000000000000000000000000000000000..090df32e5c5718b2212be32ccd5123abfe141206
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/67f89c33-802b-439c-ac4a-5c7bed36ad68.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aa39258846f687338a536b9f7546c6a1c495233e84a7aeee6b966f80efefc12
+size 286528
diff --git a/.lancedb/model3_fixed.lance/data/6b3d89bf-310c-49a5-b63d-6e70ac1aa966.lance b/.lancedb/model3_fixed.lance/data/6b3d89bf-310c-49a5-b63d-6e70ac1aa966.lance
new file mode 100644
index 0000000000000000000000000000000000000000..354238a01e2e8dbeac351160026a327957fd948e
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/6b3d89bf-310c-49a5-b63d-6e70ac1aa966.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ac584ecbb056e986a101fa43e4fe3adf415d39b4068ce3b7b5d7c2f8e088892
+size 288082
diff --git a/.lancedb/model3_fixed.lance/data/704945f8-f889-4b90-ac5f-c06c676920a6.lance b/.lancedb/model3_fixed.lance/data/704945f8-f889-4b90-ac5f-c06c676920a6.lance
new file mode 100644
index 0000000000000000000000000000000000000000..0646c656431ad62ed798f878241a7560d4c1c0ae
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/704945f8-f889-4b90-ac5f-c06c676920a6.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:992bc6dbe4dbaf4bc202bf4494e47bbcbc620be086c858a2c187526e269f10db
+size 293214
diff --git a/.lancedb/model3_fixed.lance/data/87693f0c-3640-4dd4-a5ba-a5937693ac06.lance b/.lancedb/model3_fixed.lance/data/87693f0c-3640-4dd4-a5ba-a5937693ac06.lance
new file mode 100644
index 0000000000000000000000000000000000000000..869de3026f5e2f08b1358ad105fb328e7d9109a4
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/87693f0c-3640-4dd4-a5ba-a5937693ac06.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:681889e7d9d263688dd336b221b39227cff4d488e3ee49fcba3fd3b0ae26decd
+size 293579
diff --git a/.lancedb/model3_fixed.lance/data/8c0fa0f9-e26f-4b6f-985b-cb9043f9243f.lance b/.lancedb/model3_fixed.lance/data/8c0fa0f9-e26f-4b6f-985b-cb9043f9243f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..4147203a8353d4f647d80304f24ddac41e4e5042
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/8c0fa0f9-e26f-4b6f-985b-cb9043f9243f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa2c5bdf5d89bf84184351ffd939b1c180fbef7f84bb835d8df8ad7e8ce28a1d
+size 292686
diff --git a/.lancedb/model3_fixed.lance/data/93d778af-ffc8-4347-8d2a-ea19cdd94835.lance b/.lancedb/model3_fixed.lance/data/93d778af-ffc8-4347-8d2a-ea19cdd94835.lance
new file mode 100644
index 0000000000000000000000000000000000000000..df6791faeefc3c93dc68e8cbfa18ece6d7db3b84
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/93d778af-ffc8-4347-8d2a-ea19cdd94835.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a0265b14f5a63ff749ce45594a236c8471831f69ec20825c2dce1b19b232fd4
+size 289104
diff --git a/.lancedb/model3_fixed.lance/data/94611ffc-e182-447c-9949-d88a71f61da0.lance b/.lancedb/model3_fixed.lance/data/94611ffc-e182-447c-9949-d88a71f61da0.lance
new file mode 100644
index 0000000000000000000000000000000000000000..f6d66848eb5c0588f006b1df6310229b540c2b69
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/94611ffc-e182-447c-9949-d88a71f61da0.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e16f010824cc59401fbf2e454005890e05d14d0e1786d4a66343755a56d055ad
+size 288502
diff --git a/.lancedb/model3_fixed.lance/data/946bfb0a-38db-4485-b0b6-04325da3556c.lance b/.lancedb/model3_fixed.lance/data/946bfb0a-38db-4485-b0b6-04325da3556c.lance
new file mode 100644
index 0000000000000000000000000000000000000000..68feea8215bfa97e0c92eacff7b4da679caa55b9
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/946bfb0a-38db-4485-b0b6-04325da3556c.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f74db7ced1e131c1f65c5eb5be693ca784370b24caccc70c05e2bfb6a8b6638f
+size 285333
diff --git a/.lancedb/model3_fixed.lance/data/98fc4df8-3245-494f-b56a-a27568259e01.lance b/.lancedb/model3_fixed.lance/data/98fc4df8-3245-494f-b56a-a27568259e01.lance
new file mode 100644
index 0000000000000000000000000000000000000000..54ff8d8f0cbd81a3d2f3e0f33b0b7030d8ad4b37
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/98fc4df8-3245-494f-b56a-a27568259e01.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5245c2315db732e31858729da405ff940b3ca50b54f4d9bfdc1d12f49185b6c
+size 291001
diff --git a/.lancedb/model3_fixed.lance/data/acf6594c-7ff3-4894-b9f6-603e850ed0f4.lance b/.lancedb/model3_fixed.lance/data/acf6594c-7ff3-4894-b9f6-603e850ed0f4.lance
new file mode 100644
index 0000000000000000000000000000000000000000..b895ed5bee3e88d8d15f012332e41e13486af3a6
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/acf6594c-7ff3-4894-b9f6-603e850ed0f4.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e67de0b2c7259c54e8e6f72523c66e4035566c75d96ddad476180514fea442f1
+size 293556
diff --git a/.lancedb/model3_fixed.lance/data/b5993424-a12e-472d-8612-933c57ff76a8.lance b/.lancedb/model3_fixed.lance/data/b5993424-a12e-472d-8612-933c57ff76a8.lance
new file mode 100644
index 0000000000000000000000000000000000000000..569b81f41f163f18d8baeb1c483225b8b20f6d7f
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/b5993424-a12e-472d-8612-933c57ff76a8.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06b3557010da5c41f855ff9d78095507659aea8bd1d4385e8dedfddcc89db5ee
+size 280046
diff --git a/.lancedb/model3_fixed.lance/data/ce18ae5c-2542-4b75-aaa6-a7098dc501ae.lance b/.lancedb/model3_fixed.lance/data/ce18ae5c-2542-4b75-aaa6-a7098dc501ae.lance
new file mode 100644
index 0000000000000000000000000000000000000000..7730c82286d3f0be6082657d872fc440ff6bdc1d
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/ce18ae5c-2542-4b75-aaa6-a7098dc501ae.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42788d78137c94a07d25148f706381253525730b92cb87fffe3375fd719eb60f
+size 51558
diff --git a/.lancedb/model3_fixed.lance/data/d77840ce-eb32-444c-8b3f-0f9eb8926093.lance b/.lancedb/model3_fixed.lance/data/d77840ce-eb32-444c-8b3f-0f9eb8926093.lance
new file mode 100644
index 0000000000000000000000000000000000000000..751cdd9a7c34df264d4e3f4be2bbd0b6f7041e7a
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/d77840ce-eb32-444c-8b3f-0f9eb8926093.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:238e2b7b1ca0752cbb5f8ccb18ab334b4109ea135b2be0d087b31fc7f1214993
+size 293974
diff --git a/.lancedb/model3_fixed.lance/data/e2919293-c1f4-44d3-af32-47ae7ed2dced.lance b/.lancedb/model3_fixed.lance/data/e2919293-c1f4-44d3-af32-47ae7ed2dced.lance
new file mode 100644
index 0000000000000000000000000000000000000000..58c740032b88df860a94cd1b6c4e11e7d9e63d26
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/e2919293-c1f4-44d3-af32-47ae7ed2dced.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d80eb23099dd975450dafb419187ef65d4c4c0cd22ca9830a3c2681d80099fa8
+size 300405
diff --git a/.lancedb/model3_fixed.lance/data/e7e5b9a1-649b-4979-99c0-5044560f750f.lance b/.lancedb/model3_fixed.lance/data/e7e5b9a1-649b-4979-99c0-5044560f750f.lance
new file mode 100644
index 0000000000000000000000000000000000000000..9097c83bbe3c5e82af90f62c2e95b21213a15282
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/e7e5b9a1-649b-4979-99c0-5044560f750f.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75f2fefaba7ca0f53e78d4e475936085106f9443eba9d4dba620be0ffec649d3
+size 286015
diff --git a/.lancedb/model3_fixed.lance/data/e99be02c-0556-4925-a200-3f2fd64a3959.lance b/.lancedb/model3_fixed.lance/data/e99be02c-0556-4925-a200-3f2fd64a3959.lance
new file mode 100644
index 0000000000000000000000000000000000000000..d9de2b23382071961ade9b50bf1196bb13d6c22e
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/e99be02c-0556-4925-a200-3f2fd64a3959.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b4069e0c17b8d2e2be00a5fffa495bbe43eb480f24b3043942fda77b0fd781
+size 297486
diff --git a/.lancedb/model3_fixed.lance/data/f0c6c3d1-a85d-44ad-af28-2abfb8dc5fd3.lance b/.lancedb/model3_fixed.lance/data/f0c6c3d1-a85d-44ad-af28-2abfb8dc5fd3.lance
new file mode 100644
index 0000000000000000000000000000000000000000..5c49ee48d11fe69cde0d6c4978e5b73b75152716
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/f0c6c3d1-a85d-44ad-af28-2abfb8dc5fd3.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab1a1f6d7d32346cf529d3491ad6b3cd1ab5299e652d774736396e24af507a3c
+size 303530
diff --git a/.lancedb/model3_fixed.lance/data/f6bccba1-4521-4f7c-bd09-01e5f17b426b.lance b/.lancedb/model3_fixed.lance/data/f6bccba1-4521-4f7c-bd09-01e5f17b426b.lance
new file mode 100644
index 0000000000000000000000000000000000000000..6335d373f9d32c2012ce049b40ab316cffefcd31
--- /dev/null
+++ b/.lancedb/model3_fixed.lance/data/f6bccba1-4521-4f7c-bd09-01e5f17b426b.lance
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c7b3564cacce1a8d8512f5f8c43351bb848216b76d520ee780348e80c381fe1
+size 301242
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__config.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__redirects.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a759c806f48a84ce1d82568f742bb5517913367e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ed5a06e6455ac11e0a651d7dbb328a22562699c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ca84936ea883b80eb703c824686b6ddc756887
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba169428031d3bcf0c37756c289c2242371cde64
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,81 @@
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1bf36552ea906765ee60621f660b8cba2f3ac1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef73cd7ce1d71ee625d51ea96e98eb2376891251
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_5.txt
@@ -0,0 +1,94 @@
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8dcfe9867ed00f5778d4b77bc2b95b65314c6cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_6.txt
@@ -0,0 +1,582 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/__toctree.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b6b42653c39b53712ce73d30061530f50b7af73
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..566b9d1c222238b058f8c10128443cb29dd6c33c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfa54163b21ff1cb1289ed523fd6448f77b87843
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_2.txt
@@ -0,0 +1,32 @@
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df5a7d38372480e54c7c52a79cd00cde94680f55
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_accelerate.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cc85a9b1886af385707cd83b37f2e01cd845658
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..484e1231382dd428d33942196e782a49453e9a7e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..933ac76a61ec85eb5027851124bd0a7b0ee8f317
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_10.txt
@@ -0,0 +1,22 @@
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad0e40b009a08bb7c60dce16fdf3475f00bd32a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_11.txt
@@ -0,0 +1,8 @@
+cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29b2f4f10b938e7fc9b0360358d4292701c6fcff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f20d287766056852dfb969286983fc1c4184cc6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fabaa8bbc79d722e8fe859ab4cc8723e2b352da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_14.txt
@@ -0,0 +1,10 @@
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d1e1a793c3e21b19c2462d36264d2282d6a0cc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_15.txt
@@ -0,0 +1,2 @@
+Jupyter notebooks / google colab
+Local python scripts.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d1b23eeb1e195abdb01c059400c3c77107de05
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_16.txt
@@ -0,0 +1,15 @@
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..745098f22204ea5eb682d58e4b6a443fc50b063e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48a37ccdfaaae0c0cf4a79270e576753cccdb0cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_18.txt
@@ -0,0 +1,9 @@
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5d1418d0d9f6cbc8d0436a61408643f7e0808e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a95879fc026b8b3f0f40630a8b0070eb76b57449
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e247cdf911320cd7966f9bcc2d1e551ae439b77a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_20.txt
@@ -0,0 +1,6 @@
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b77bd4cdc24e12f013eba4b5c9b3f3be639013e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_21.txt
@@ -0,0 +1,18 @@
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87115ff8fda47a77407d2f8e60e519ed9958896e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_22.txt
@@ -0,0 +1,23 @@
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8e7e44cc1c452dc16a29212b8b6573630e2e637
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_23.txt
@@ -0,0 +1,10 @@
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23a4f37cc7235b192dd8fed2bae50f5b662bb0aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_24.txt
@@ -0,0 +1,25 @@
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b818c1ce7e642f448436a0351cbb26884c7e07f0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_25.txt
@@ -0,0 +1,13 @@
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9e07b2ba7566c7579c52390f1f01b011177f17e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_26.txt
@@ -0,0 +1,33 @@
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f48ff253e575d5bc4a801e1508ea063bc784ad1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_27.txt
@@ -0,0 +1,45 @@
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a27078e43cf8c6af55b47adcfd5564e1f2f1c26
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_28.txt
@@ -0,0 +1,20 @@
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8de4ff60acd06e61f39ea95a022d4e93648011bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_29.txt
@@ -0,0 +1,40 @@
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f722b1a916b0963330511ac7ed47bde54f497e1e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74b59316bfe03cdf94f26611a88794afd8719e5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_30.txt
@@ -0,0 +1,47 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b27d65bbcf76b98951f9deee7af51e7517828688
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_31.txt
@@ -0,0 +1,5 @@
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27be1a13c321d7e5cbec242fedba30ca984d7e9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_32.txt
@@ -0,0 +1,19 @@
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_33.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..602b70fd3150d1a13a43f594b58190aa4a5c2d57
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_33.txt
@@ -0,0 +1,14 @@
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_34.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c763562302db4aed0a6914d55f94b5904192709f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_34.txt
@@ -0,0 +1,9 @@
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_35.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49a2ec48f4969cd7df85efede4346b0db135d271
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_35.txt
@@ -0,0 +1,20 @@
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_36.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e19a4dc3b405a4ca18e271b6e4cc916b291d1d71
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_36.txt
@@ -0,0 +1,28 @@
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_37.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36aaca74c9a9d4fb82967a311e0a3d8456afac41
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_37.txt
@@ -0,0 +1,2 @@
+make style
+and verify that your coding style passes the quality check:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_38.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4012228a96c8f3ac4c88879dfca41308d80fc529
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_38.txt
@@ -0,0 +1,16 @@
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_39.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2ad34860e79565120fc7a3849922f7ada8b1ee0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_39.txt
@@ -0,0 +1,18 @@
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa322fb2194a9be0f00d78062a08488a7466834
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbb8f39667c465de9c86f84ca94c2b48bdafeb91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0065d2aea0303fcb6a5291bae1c155fe8cc4a2b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ce6aa0a48017d3ba7273e472428b85ac76baa2f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e212dc19838c72351aad509acd57d394dcaccb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_8.txt
@@ -0,0 +1,28 @@
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1b6f52649494931a3da25bdcfcd56a30d0f62f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_model.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
+
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40b0e609826a1e69acd42713f96100f485638087
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f54053e3be62402fe4d0fcbd25225a4ce153db1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16faf10f85f4000a834d9c588d1001b2446302c8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1182fd437b4c36851d84e0591bb6969920ec5b29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b90c712d7422d55359fd2c719f889c9d61e480f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
+
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7fa7cae03e5706a3e6fc766898c95f5c6b44720
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4933b98f96054a82250b03e0320d5f9221875c9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..611086557394c22f11071e5fdb3b9dc544f4ff10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa4b9e7c493891fab6884d4c02b2e1a802f09db8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfdd72a30b70fcfd7d672ae7fe1562b128f436c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246ec11c37958abd40727071e4b3eb28d8e6e184
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1644ec24586d165086fb3e22fcc0b0b731f5fb0f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8e89e7f6700e532d708cc3478cc2ad07bef300
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8149e717d759f1435d51106d6c3efe70facdc287
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93f44193993a08b35dc269a70764e01f8a39525c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4b191ae9b6f8ae83b3c4bf89b4ab4f6a2f17fc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_12.txt
@@ -0,0 +1,12 @@
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
+
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14104fb8dcdc5571453a4ac6ee7cb92de1517b5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae2423dcb6eae50d3637860a174d22bc0658bc97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_14.txt
@@ -0,0 +1,23 @@
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..955d84186a290df0383a2c288a315a344b5c6f7c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8a21d85d23424ca6ff6de61c1bc20b251a2e3e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2cf0f3e3d68fb659cc30b3c798c134a66c10b0d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_17.txt
@@ -0,0 +1,17 @@
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff3f81c865872b7438342e4fae407dc6f9b02c3d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e49bc31203444b0a6bf4f637d7b237b3561651ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f271e4b04a00806ed152002a4bc04ff41ddd95d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ba64d33cef5e569a967722237812cb5b3ad5889
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_20.txt
@@ -0,0 +1,13 @@
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..386ece0affb794589dd755cd007dbb5fee4334ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_21.txt
@@ -0,0 +1,21 @@
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
+
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec62d5c0911ef7cc900b7196aef0cac78307e0a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_22.txt
@@ -0,0 +1,14 @@
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae1f8d8bab8ef8ab7cc6715ad2f8069805bed381
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16dfc240c0b39e7276461575119f0fb22a23297b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34f71a58b2b06a27909c825c267c04bb7a9653bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02608446134cc0b45cb5da8275f24228e385421e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cf339fd5d78bfb04cd9e6ebf235e22a6970ebc7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6d34b2445064e0700a73eff91edb662cd7640b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea19a4593f7e01e840759e2fe66c6815bdd5b4f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_agents.txt_chunk_9.txt
@@ -0,0 +1,22 @@
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24331c2df8e2f141794cf3ed1a50c8a3110f0a12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03744493d9f16ee0fdeff415c5b032f41f006161
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_attention.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fffe343ea8f5b84747043443a431b38fda23105
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c31422dbbdeaae8874ddc568de0451243cf20123
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e632f2f4fd83f8cbfaae2503e93b5f63af393f3b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1313329e4064b9b353ac4394e0dc5288a112b955
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb367919a5daaa574bf68b8a1e36720e353b9b02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..542b87579a934515b05f9b5ee237d1735c105054
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5c8f18ac97a9d0daa1cced11667b6de718565cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baf8e33470a6592f2a22e0cae5a81462e68073db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22567d54cf0e10c9b5e17c6510efa451df141a52
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt
@@ -0,0 +1 @@
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bff94fec35825f4ab172b0be879990cd9b7a35f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f24075ca6a0b1a78d75ceefa9cb3e8cd8823e0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62aed9be568f13abdeb9ae2223253152a77a35a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_10.txt
@@ -0,0 +1,16 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..037200d3e66c37159fc7ca3904d8c2aa76697445
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_11.txt
@@ -0,0 +1,27 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8756855598e93bd0b257b52f19ceb5132c6657ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_12.txt
@@ -0,0 +1,11 @@
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..163c170d5727371790a6d6a1e7e5360ff07f9bd7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_13.txt
@@ -0,0 +1,13 @@
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7bcee951b813411d449432cd1c927b0605a6ddf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8e2b5347406d40a982ce81a563d11dfe9888064
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_15.txt
@@ -0,0 +1,29 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84b0997e6a223111d50d87d0afc2bea136308163
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98d65f2b52e71813687a2b71ba72cd177c3d3379
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c2df721237c9632c4112cc349583efbe61bf62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0dba1676f84ba94afa4e37d33e585c977b67a8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..414e32ad5254a53d5f2345648a8fb90de20ce933
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd9556056e100e94de1d0e1eb35beb890139006
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..010a95be46a6b17650415bd5a997bad23f768575
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0934da8a4f06b8761413b5f964eb7bbd35b842d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26c402b9747003442c433de2a74d132e76b5274a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7189d04e63bc24a578aaa86177fc2f71012d21bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_benchmarks.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2b1b45037293150c72a01f8f3b25c43e3191361
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..792f21457d22dcc39003dd6d40827944f7df90c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_bertology.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7108f2e4f01880e68b02cb4241f197642439433f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8b3249a2955ee80b0f7e37234ffe634dcdbd142
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc9dc4470097c73b12d6ce31ea8ef8df2fc81c01
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4d575b983f78db5a40b57f0d2adc7073cd94484
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3026108370ab821f5a027280fa86d6f190d03352
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fa7aac0f7248af01c555ac0ce695a779fa6e1a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c91d30a6ead90ed52dfe1211c05dad00bb6649c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3fc418629b66e631b433d22f7e620574c5487ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29f3efa3d80726454e78423a742942bae7126c59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_big_models.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97200a2ce40d36ee5737507c60d9be1f65d69efc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2e28be1c1686721bfabb8d7f693f94a358f991b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b1d2a1d52e15189ab12354f3f1858a5564d8677
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_10.txt
@@ -0,0 +1,24 @@
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
+
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c8e9e63930d53b1e5263a1558b6e0b494f30693
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_11.txt
@@ -0,0 +1,44 @@
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7de549f0818318a1c72481f0425b44b791849302
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..124b34f759aa3c290b5009463d7a05a71315c33c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_13.txt
@@ -0,0 +1,21 @@
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b2bb9d5581dae4ae6cba660bb5131ad5ea1e25b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_14.txt
@@ -0,0 +1,2 @@
+schema = get_json_schema(multiply)
+print(schema)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b471c1cd1362c2f3db9336b8bea09bc327c8671
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_15.txt
@@ -0,0 +1,68 @@
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d88de58e26afacc3372ebfadd0f735b47e9acf5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_16.txt
@@ -0,0 +1,21 @@
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0636a4a074e236925e43f6801142691854af0be1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_17.txt
@@ -0,0 +1,9 @@
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e59d9a3ec12bc91473aad53f385ddf702b7a27c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_18.txt
@@ -0,0 +1,70 @@
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f40e2a23592c20543c0d37e96e36645d87bb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_19.txt
@@ -0,0 +1,5 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7e2bc6b440a7c183de10ca79aee119175ffe080
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0ecc78484e0b51206abe84828da3c868d8d80e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_20.txt
@@ -0,0 +1,121 @@
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153602130528a8571bbf5e10c45ff281652ecc95
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_21.txt
@@ -0,0 +1,7 @@
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38f47a8a469cba52da04f426ccfe298f931a1965
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a96b3282a97ea2603d18f27f9c17e51cd66fdf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a04999f361f9ea2ff629b91403459ba86870f0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_5.txt
@@ -0,0 +1,67 @@
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66ee423b454237d02ffc88b39a920938c1ac76d6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_6.txt
@@ -0,0 +1,27 @@
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03d9a570ace7ce44b2664d706e79c9bf4fa02cec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..649739e1643de048045d32e6b41e58ab4c38c443
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a8a984250ccd632037a7d4c3c2c2c5e4eda65f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_chat_templating.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_community.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c0b026adac7f4003764e19db847051a7e73152
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0001b48c01bad457e04398e08366f3e53c3c929
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..141ff7294f24aeb99fda884d268e1a04e55c9177
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63339f5136e4a8fcc66b84abe8735776bacfd9a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2bc0489641edcf364d55ba028a208acbba68e9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71ee94641826468637a973fc418c628ddcf14d80
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_12.txt
@@ -0,0 +1,33 @@
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5354fd967286a073d8ae4feb895b103a2da29ec2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a7cc5ca924ab6e1f75fbde906e54619102dcb15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf1ad2756b7c1e33938694ecfde288c10be7542
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_15.txt
@@ -0,0 +1,16 @@
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4a51be167cbb4891b88f598fa6c8c20aeeafab5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04d9e44ac6975d3a966c7fb9c3bdf4c1ac5b9254
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91ff9c5df46dbeb81702da13ea0d96f49f56de23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66d9db7ba0a5482e6e55ca084a45ca96b0c0034d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
+
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e825020e23e9be8dec03b02e6b1ea3ddc8188b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f6a30d51eeb4a1bc135d8b9e7a613f47006b8e6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dab6de9e2eb511466ced3208e915dd41be0d57e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4de052f43b610860cf9b6f189dba78bb58e83fed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1def05106917a82fda3db915703fb74ed41475bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_contributing.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6841b0f6cc4d1681752a35dfe3254676c5adbe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16f1d0ccdd68dbc30ce858dbb8a436218e142a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21339116db17fe3f096b7b1a068678d1ef384eaa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d123f516fb804bc17771ffbb9e665650a10ee5b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_3.txt
@@ -0,0 +1,64 @@
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d3fde932a5f64c46ebe333d8b9f87f56697181f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4635b1088aa15f510019a75fab1956d61ee31e6a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b99cc9303c82de1d2f9ee6913c338e5e14a7959d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..974896f97d50037e960a68637b881cd9ad2f2691
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_conversations.txt_chunk_7.txt
@@ -0,0 +1,25 @@
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cae8b70fbfed3f34890c2648dc11eb8b7a6c6ffa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6c14131f918a1730840f4deccc970127404ef12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c1691017258733e72abd8b57f337b1b7627028a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3144e82564d099705d4f77bddb7a5cb7ba4543
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc759a59974d89e3c301dcd95eb82d923b3d5ee3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_12.txt
@@ -0,0 +1,28 @@
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdd6f6f300f9f409826550e9d611f25da36b7daa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_13.txt
@@ -0,0 +1,24 @@
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9601ff231dd5483f36427bcef7d01347f5d0fef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_14.txt
@@ -0,0 +1,12 @@
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d0ab620feaa24e813efb007e86bcc8c4a34fbeb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0f85aed78ad6a78dd8e29026cf165f046232373
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c670f116f6ff46a9e4e27030b18f3a9d87eb5a1a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae20232a549cfbdcc4abd23bcaf405994cbecc03
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_18.txt
@@ -0,0 +1,18 @@
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2fe6d44f1b761361c94e43a0d5cd612c8fa3759
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_19.txt
@@ -0,0 +1,18 @@
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
+
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0abafce7a0ec8050d94c9be1e364b2fa5ceaf2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5233a6f81f2493f0f5a7a2ccf7f250ad62fdf310
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_20.txt
@@ -0,0 +1,14 @@
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e543edef29ba5ed33f318a3778bb5b4c75c50c37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c88991935f673cbbca2044a4bb67b754d9f2a8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8f7ee9537a05a2ca0a1fd613b2c2cc0e287e1da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..252d5e165bfdafc5c881668a8449e2529be51261
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b121564b63f1ae915d56012408a188a7f158edec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1593dec3ebb75e9bc47f340beff5a4e3235d370d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e445c7ab23e36c1382eac25454aa3235b5fb03a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_create_a_model.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831eedac8027368624f10d54e96d241c21f083f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b776232801aba00ce2d819f6fc333bdfb2ba6421
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..501d503385011cbb4bdc6bdffa9f1331792d8948
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c3ea45335cc96d6367166f8825d3b1d035d2385
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ac9f63f61e82556595f8def0284df7c6a9900eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a26636982c63c63fe511a117865fbb132a6b8511
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_13.txt
@@ -0,0 +1,21 @@
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db8fa64770a0e53f9c3eca110da0da3e94ef64f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..841a628e9430db2ab276243cff7b97e7d091f400
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_15.txt
@@ -0,0 +1,15 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a439e9fc3e5e14e82b02150f05e725a1f6c9d13b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65df3346bf8123d6c593a6dac68d2c42ab01e480
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6908ae0363546f440a9901621b61c15b94da4aed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f17750079b8cf1314f858ae4c63a9943babc27f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d74411a3507d51d68136dac12d8f4190cb7cb45e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d75ed4ab6edabc313b4ad1279a307e7d00769154
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..300c5fd176a249260465121c8afbc6f2d67586b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..163b1a92810fba3cb0cee9cdb3516059af095ca0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b83c5e83bc946366994a1efceac2d92ab58653ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_custom_models.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..088a57f1f260671fffa9d826149d0b9b1e40ade6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcfca5548c9d98f9fef2bc0305b3e7f0c773a002
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b89e3ba7b2e5de8059189f3048b079a5516d9259
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_10.txt
@@ -0,0 +1,106 @@
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b065819884f262beffb42ddcee47ed11c62364ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_11.txt
@@ -0,0 +1,22 @@
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b8dc3bcd6ddff9642fc72948f76b71e327e001f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97f45f626e83bdd87dda2ba7fe069a130b28229e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecc20cf7a560c8c6d0101f0138d85524e1418f5a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_14.txt
@@ -0,0 +1,10 @@
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d02d1b6b3da5ed68ac5032a621bd4b241b0d9eb7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_15.txt
@@ -0,0 +1,26 @@
+*** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
+
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fb363e3fccdfe84e37dc31b1e4adea1fe82df76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c0c9783223b135f7e1440e98353a581d5dd1a33
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
+
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c94c93cc3af489bd0e40606d6e5990ff8af712
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..071b5c526f2cd453214940101ef3bbac461ffdbe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
+
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6b03367754a84456a63f6a31b54278cda890290
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d52adf26c194a53ae5b3fa80b61cec83d122b9ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a698cc7517616d609e21959ebefec12a67c947e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c579e8a03734c9830be45507dda81a3b0db507c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc16572b26a282ebf23ebd79b4ca6bee8f671a7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_debugging.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b99d61d5bda88d2048d0b488eb73910c702c65f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81e63d20229e529c36021281ea1d099e58611f96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8140ea405106280ff18c38281cd48c8a55ff758
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb7e773ac0f193a4c3108227823082c97a898a4c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_11.txt
@@ -0,0 +1,19 @@
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10964e649a7ad8f90dcbe4ef12f2199ee158b844
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5be445330fefb065eb5bcb4b38dc00d2955dc7b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b07c315e70279a4db95d949a639f1505080f79aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_14.txt
@@ -0,0 +1,26 @@
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a6197d640112bd8072ab429af13bcc50b71432d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ded41bc8c4f56c83f2ad41328257c3b971b9b309
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..907e2406624fbea4ee82747ff3026bbba1269723
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_17.txt
@@ -0,0 +1 @@
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14df98911aafd2d0361c0e72dcd4e12506beeecb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_18.txt
@@ -0,0 +1,23 @@
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e252c6a7204ad698b4011aa91a1036de6845425
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_19.txt
@@ -0,0 +1,8 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7249dc71dc43837d84cfc4b7f135f67e140a7984
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b21cddc20c0cd5b19156630b5599703300dd1112
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_20.txt
@@ -0,0 +1,34 @@
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ced0a273d4839cbdf4c9401fccdf363a6f10e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_21.txt
@@ -0,0 +1,13 @@
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75abcd658cf5f74f0d0ad7afeeb90d9e61f6421
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
+
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd51e610ecea2c387577a26b94763b57cef1848
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_23.txt
@@ -0,0 +1 @@
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..111f18e7d227eaa9645d6bb5782ad89ffb34cc84
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_24.txt
@@ -0,0 +1,24 @@
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b923d67c9e4e29a8ffeb6cb96c10e2bfb7ab585
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b4ef1d873e9cc668e9684021ea65af4fb773ecc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_26.txt
@@ -0,0 +1,17 @@
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d62f59d8d0fbe5b7aad0f3ecd926371459f6e079
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_27.txt
@@ -0,0 +1,11 @@
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a85908e13de0df889f83374a4e271656805b00c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_28.txt
@@ -0,0 +1,21 @@
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4335a03f290a98ba3163201edae32ed0fe28a2ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_29.txt
@@ -0,0 +1,8 @@
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c5d79baad8e9a16d8a101951bc5c3aa577b8810
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d91bb98cc1715b40d73627691d5e8d401a60898
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_30.txt
@@ -0,0 +1,13 @@
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adf10ebd26349305b0ad376aecb2599ba40b3f24
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_31.txt
@@ -0,0 +1,18 @@
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7d704b38373a3fd57308cc385813ea1121b4ee8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_32.txt
@@ -0,0 +1,11 @@
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_33.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4dd0e98757ccc314da0aebd7302e48d17c658d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_33.txt
@@ -0,0 +1,13 @@
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_34.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c34c5d588939e68ab3e261a3dca8f38c037d2ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_34.txt
@@ -0,0 +1,10 @@
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_35.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c9fd5ba1862f2208dbf440d9a46af158b91966d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_35.txt
@@ -0,0 +1,14 @@
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_36.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c84b48d98518a7745a77f826ed1756bfa5a3112
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_36.txt
@@ -0,0 +1,17 @@
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_37.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c2acbdac1e5e98cd57f8409fc990c27bc94a0f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_37.txt
@@ -0,0 +1,5 @@
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_38.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d290f8bac1da361b4143e0da660c2460f67d10c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_38.txt
@@ -0,0 +1,14 @@
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
+
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_39.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8533943a7f0cc78a57a8f76f6bda80ba68af4211
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_39.txt
@@ -0,0 +1,29 @@
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2752aae17d94f6d86f35b9b32570e86626b36e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_40.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..510371947c38a47a7c0e4bb71c13b2e5d68bf758
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_40.txt
@@ -0,0 +1,30 @@
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_41.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e10b1c014e44545dc242477a769a228cc35c9fbd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_41.txt
@@ -0,0 +1,16 @@
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_42.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1e074639f73b1d9eef2db2e8c5a9063b07139f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_42.txt
@@ -0,0 +1,7 @@
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_43.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb1617f21f7fbe869cdb1eab6f7ebe86635e86be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_43.txt
@@ -0,0 +1,10 @@
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_44.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dae501b1ff5d1787a5073c67b8f0b747c3fbf5de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_44.txt
@@ -0,0 +1,13 @@
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_45.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80bfeb66c02dec1a2e035e1b7e290431a47c59ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_45.txt
@@ -0,0 +1,3 @@
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_46.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c7f521a423b7623d5079e9250273b7b1e41607b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_46.txt
@@ -0,0 +1,16 @@
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_47.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2bafc03e9eb25b16722fbc4ea7f80f1f3c57de9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_47.txt
@@ -0,0 +1,6 @@
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_48.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4d6fc793b5138e0da8c36bdc53426fbb366a404
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_48.txt
@@ -0,0 +1,7 @@
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_49.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20835496b9fea23add6669aeb6da101f380a9cc7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_49.txt
@@ -0,0 +1,20 @@
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3f823eb7922f33a8ec50e52773ccce87811a69a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_50.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..301a58a370da4a400709d23f4e49db7b5f7ec4b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_50.txt
@@ -0,0 +1,12 @@
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_51.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fb4ee6a4a22d2858075b662c19aa51a9e77a3d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_51.txt
@@ -0,0 +1,22 @@
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
+
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_52.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4a70bd04bd0a1c3a32317742aea46b451317b44
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_52.txt
@@ -0,0 +1,29 @@
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_53.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..410463349fa9707c3871748db465387e40e1e43f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_53.txt
@@ -0,0 +1,11 @@
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_54.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45c4dcc1f4126311052b72798ff94aaffe3fe7c0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_54.txt
@@ -0,0 +1,31 @@
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_55.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44220c70529453519bbcd6a3485fd582222700f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_55.txt
@@ -0,0 +1 @@
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_56.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_56.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c901ec5d4e85baa02a124a2e721ba24f60b38b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_56.txt
@@ -0,0 +1,23 @@
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_57.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_57.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be1b5e8c07f70d8a33015ca8fba5582f461f77b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_57.txt
@@ -0,0 +1 @@
+Save the script as t0.py and launch it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_58.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_58.txt
new file mode 100644
index 0000000000000000000000000000000000000000..934c3566574fcb4584c2dc103a9bb5f49a74840b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_58.txt
@@ -0,0 +1,14 @@
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_59.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_59.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48a2a5ce4d18d196e3b6094111a7fb82b12ff0fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_59.txt
@@ -0,0 +1,13 @@
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0af6b80ab2dccca08773d2c38d01a33bdca4556a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_6.txt
@@ -0,0 +1,14 @@
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_60.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_60.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f66db106aa5f83a396f2f7e860ce755c8eb363d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_60.txt
@@ -0,0 +1,19 @@
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_61.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_61.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a875b328b2ad2aba55156f57ef0bd01c84363f72
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_61.txt
@@ -0,0 +1,18 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_62.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_62.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb204ac81920817714ac83fcaa54d2569e2f1eaa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_62.txt
@@ -0,0 +1,3 @@
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70a65dfe1e759a5e24f695c0901a5e40d75b1f86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c3ff18bd9342b16b6b79cbd0154ea099254982f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b62696e8aabcc7032c325f755c5f46721b7b12a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_deepspeed.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8951e283bd84154cbb60e593583a6786712eea8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..457257bad538cb603d6221c1af6984f1323acb20
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c02b84c407584f73ae55da4fe65ef7766ee9a5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d030cf22ef6ab8907a6a4b20f0344d485a745d17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd67e8ab72762882c4c94ae7a5f2370aaa116280
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fe84bce2239fbb06c6f87e3d836196441f5ab20
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bc6882c72fbc794f64a26c008e8f8ac6dc95a38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9268faabeebbfd711a88bc213f558d0a67861706
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_4.txt
@@ -0,0 +1,37 @@
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a9d8f7b540f3ffc1d0a163943b8759050cd5613
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_fsdp.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf471bffe12e6a8f0042a54ece0b865e5777c59c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5540eb066ba08e8a9c709c9d19fb8cae8eac5302
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d0f9bccfef9fa360865fcdcab6e5c68926c16c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71974ede230439f22019d6d8c72e69512104c95a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_11.txt
@@ -0,0 +1,3 @@
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2246f3427e9f393ad1248e2015ae67f6e6563e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..053bf99a5e240c1443b32d1eaa2d07f164518619
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ad28172ccf364ce4dde97b822f0136524e6cf33
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e6bf57a668622d60fa75ee70c40b7e31a20cc38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_15.txt
@@ -0,0 +1,13 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0acb747e411fcfc085f8b320d7684995fe8960c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4d43b6b1fcac23e353d085d6be5b33601c12030
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
+
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67c34afe674cc6e111b2e3aea0af9ef40310845d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1b989807cde75a44e39f915632542d771f15ed2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_19.txt
@@ -0,0 +1,7 @@
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1475981f47fbe2d26da9ffcf0f92606ac4140455
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c2791b1c8ae9b777469e00aa956ffe39865ce65
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_20.txt
@@ -0,0 +1,10 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1027b080ab11ae10c5e30826753027930da36a2c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_21.txt
@@ -0,0 +1,8 @@
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35ab20f5b1d278def510d1b169be207f5aa3e895
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_22.txt
@@ -0,0 +1,15 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
+
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fbc30286e230817500c2cf177c197cae08b9864
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_23.txt
@@ -0,0 +1,10 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..653ad44fa79ee886004a4f1957fc11727df2fbd5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_24.txt
@@ -0,0 +1,6 @@
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69a979fe92f2be7381a187d8aa7e82d61c363410
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_25.txt
@@ -0,0 +1,25 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1321303744647bff8584f5d839018821c04ef544
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_26.txt
@@ -0,0 +1,13 @@
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96115e2b35c3756f4ac069eea0321ba0682ff328
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_27.txt
@@ -0,0 +1,15 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67d65a7e72b6745086ec69234bc1c7b9cfb98bfb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_28.txt
@@ -0,0 +1,15 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8d8862b5900f6b9e6fcbfd8cd8c32f4b9d2ef0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..707361ffc026476be8a5fb885ca1eeb2f103d3b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e490c5a5e714357a0cae809a2579d104cfa1b902
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73abc4a59bf8709e5a4665d3da188bdcb5a98031
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da357487f2109c4d8bde11a4798bc53fa1af2f87
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2364ea68e4ee1db21d080dffd870a0d67df4133
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d86f7a903916dd5a7ee5058cd4c548236f27b45d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_generation_strategies.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b77fbd7736f6ad093430689b97c702dae4d149ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17db2f9e9211f628697412edb2e2bce934c52c20
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba996adf82035920064a2af0d74c93f1be58ae6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_gguf.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25a3ff9a769bee0659f11be3032469be7db49979
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4406ca4f1d63491db64d3f5eb04e0ecccfc7750
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc793bd6d7a7d8832f9a2ff23509938935cd1733
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e145c478fbbd47503c2e78d3f6ff6390eea7be05
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3ccf13a171c4458a8eab492e4dd083971d09af5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_12.txt
@@ -0,0 +1,83 @@
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be2869935d4b6998f91c7d72b54ae4777a6abeed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_13.txt
@@ -0,0 +1,23 @@
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..680da4580f166c5194d930f8cc57401155e4f2b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b495c3a64a11dc1e52a3e9419fbde0ccf45ec88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_15.txt
@@ -0,0 +1,16 @@
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9969f15e7edc7a4b07597afb54370c11ce22a76c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e25d6b02672aab3a508d48cec27b5695963a910d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_3.txt
@@ -0,0 +1,32 @@
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26fe9d8cef7f868f53c10056a83562d8c22c0b42
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee710f8b81aee319bc299e7b0a50f5fcdd2527ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7f3a4d062ce2016133ab322e241a3beb344cd9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee5ff2c97fb8d5e0aa3d4ad2c09ff99dce980e68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aac782081f118be7961f1874f47a092f6ca82090
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d10cbd60feec60abf17bf07d5adc672c4c88ea85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_glossary.txt_chunk_9.txt
@@ -0,0 +1,24 @@
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69010da85d694e142e5a061cf645a43bade69aa5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99668da40cfe5e2042ab3d0b90f7cede33ad0938
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11fbee134569f12bd0c9b71c150357871a5669a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..075e3797c0e4b49a75e087ca70a0d284f54dfb61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c3ec6bf86ccb71ddce785efcf93c4bf9b58e19
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_hpo_train.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fdfe935d7ff21292dc4a374a73e12945eb0e8d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..893217a14f2e34d994e81a37bdfeabd80e2946b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36241d43e26bfe1a13d3ceb589559c4cfc7c519e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64fd90281a14c60e6b1cddd4af4dc6561c679732
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfa8ed96054fad3fe8e84ebc024eb462e668191c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d77adc71f7b49c3ed15c8e66a88e74e51569cfd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33719dd664831ebdfdd08e331a79ff6b5c87ff2c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cf5881e9d0190a71affe924087beeb848c1f8dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33550a4c2a6ab7a3dfd31db412be63dd08a10337
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_installation.txt_chunk_8.txt
@@ -0,0 +1,17 @@
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebb723924e9ce4f5abe4435cd4617c33a5644983
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10c7d0ae09e9011d9aaf1ab205457bf7c3720bab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73b34e53bc5023d646a982c01aeea18aa569e67f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5274117eabe096ff94d7fcff31a487075e4a7927
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a292e0da588b3f41fd39e816e367f5774292986f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41ce8e8fa0e5b1a418374cc65e2599e09268d1ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_13.txt
@@ -0,0 +1,12 @@
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df9bfda464fa9fd1bf79c319c4191ebf43f5885d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..320585ac0bf4eecf17079a5fbb8d9d12b1e90132
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fbd181d601b73b1cb637c2f7a533105647f05d6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a83ab590b99389260d7b913bbc40ebb40c261967
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_17.txt
@@ -0,0 +1,5 @@
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f2256e8c72e4c148719e28d3854cc25f9037920
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_18.txt
@@ -0,0 +1,16 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca0ebb2c0875329c074402476299048ca8572575
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e522fbf95d7f8f987dd1e518a9550122d9158190
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..181324de5c2939de66fd68e263d69bd222345d66
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_20.txt
@@ -0,0 +1,12 @@
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a1f29603a65c33c0c295e6996489d492643bb3b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c613674b88036e7690b1aef3e422a9f4d0bb857
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
+
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f47d90ea3548df517ad445073f0b670142608c50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da3f1826c9e16cb49b58361b92bec4fd1339c22f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_5.txt
@@ -0,0 +1 @@
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a00ac29868391bba12cb2f5bab3bfad1530d941
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b339e1608729cb30157b3ea8816ec55ec685529
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..773183fdd6a0a724b0a5110f2416a611a6317cfc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e629345609611582d6352153dffae9c4720110d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_optims.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7343ba5004aa46c410aca4508f929f11f809cc0c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dae908cad6534001c711c1b9725956161558f6db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f9439576690905ae3ee81594d69148ad1dbb6f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0326e7ce2f9e6255c4a883f7361d5c6725132e1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_11.txt
@@ -0,0 +1,27 @@
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ed77d77f50592f046ea87577f99c585b479088f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_12.txt
@@ -0,0 +1,16 @@
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c910ed38f4e58d1774604527d4faca1e15089f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2a4de13058ffcbe166238843803301e3f43bf16
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5f9a48250358fac5bd90836b932efb8bd3eed7b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e46ad73a9338d0e7d6ed29bc512814f1c86050b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10042afa5ea408452a8a1188c1e9ebe9e3645741
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a37c344d2d5fb881c2c82acc142bafaae6e0fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de38092eb7a68818a02b2bffca9005f5b754467
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f4251a4c350cb0f1e1cf49b2dca8ebc189cfbb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..caeeca27201cfb1729e9d4483a46395066e4237f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2656f6f7c481dcebdea4ac91656fd30c150713ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..150d4dfe6bc36e33bb5dfaddb1a030b70d90a9dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eb0e5ad20656385e6c6970beda6b6e337bd9d46
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
+
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc880f9fc840f475fe142dee3fcb1d2eb33067c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbf2edd90ed9181b16e0eb45754b85de41b7d499
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
@@ -0,0 +1,14 @@
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8e5c330b7afde65587662aa7e5e8d27c8a7a8e7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a58eb688da368e1661ca917b3f61593fc4653657
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
@@ -0,0 +1,21 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b817ef312530a7648589f7a3f449d22447c0745f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
@@ -0,0 +1,18 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ea57f221dbf074a7a4ceab1ed60171e32c3081e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
@@ -0,0 +1 @@
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a2595db5b6103ed8bc395121b22df47aaff9d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
@@ -0,0 +1,21 @@
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8748b65504e2307b909adebf1dc7cffe3968fcf7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..969c50099a3ec1e30bed601ee56b0ad9d6b27240
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
@@ -0,0 +1,13 @@
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ce70df4d0238d7f0b6db2c47d06341200a6a847
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7736e2f1a78ff20d646f633e5ea26a3162357b9c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
@@ -0,0 +1,25 @@
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..764d4e2849c891ca7cc8feb443e8c582ceb30344
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
@@ -0,0 +1,21 @@
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c0c9f8eaa1f9f1c107388763e2fa1068600e1ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
@@ -0,0 +1,11 @@
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b10c6d3321b138691545d8cef46ec8c13d176cda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
@@ -0,0 +1,16 @@
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83e7183a5731b543d1788f3d33815476e119637e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
@@ -0,0 +1,16 @@
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..204f99b515d19b3287e0c84e87a46d86d2d9832d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
@@ -0,0 +1,13 @@
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ec60e22b658c65e58430554084ad33cfc448288
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
@@ -0,0 +1,7 @@
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7be27d403c6876cd6ce76e38c23d710744d5373
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
@@ -0,0 +1,12 @@
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad884d314e861ac23993417232cd347593e9d2c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de563bc7f84ce8ea2fbc741d96b254aae21c40a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt
@@ -0,0 +1,7 @@
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bec99f066ace75e232ae5d4d6a5eaa7e0e4f86c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3b4f72e4222e13f8fcd4f38815c3ff372e1773b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt
@@ -0,0 +1,15 @@
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cf42f242c8f27fd9bd87e1be8daedd9b717c462
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt
@@ -0,0 +1,6 @@
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..088406832d43a8e6df8e6b9b2bbea1a44b42d3a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt
@@ -0,0 +1,4 @@
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..355bd52329d386e8d06aeb2bfd4882c7dceada19
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt
@@ -0,0 +1,14 @@
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ee4ce614728368266aa5ef2862eac7d96e5718
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt
@@ -0,0 +1,25 @@
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bfa83d1a811f7c5591f1d3cdec4faf9b157bcb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt
@@ -0,0 +1,15 @@
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3864dd3164dc2ab914272e2f9e3b4e08bd8b7663
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt
@@ -0,0 +1,7 @@
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2dff01fe1710c441edee25089ee37afee9d2c03
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt
@@ -0,0 +1,29 @@
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ae2abb6d51db245a789dca9328a92b115abc7bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt
@@ -0,0 +1,6 @@
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52f12bb15023a5b63b5758790d6864efc81b94fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt
@@ -0,0 +1,13 @@
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e6b7ce7aae44b0e8f855fae0949042c4debb158
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea57ade8b93978444d6e8c827b7f23663b4c986e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt
@@ -0,0 +1 @@
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea2bb1d4272334baade7892c1103bcca9a6b50a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt
@@ -0,0 +1,5 @@
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b7a8b7115fec59689403dda268a91a029249c1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt
@@ -0,0 +1,4 @@
+Falcon
+PaLM
+MPT
+BLOOM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07ba05a2d9382b5db1eb769b7157a2362db60ea5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt
@@ -0,0 +1,6 @@
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32fd9bc17386098dda83b0c8e00710c2e42d10e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt
@@ -0,0 +1 @@
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5bb79fe3d158cba255808b39e579a7434b434a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt
@@ -0,0 +1,4 @@
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178735546e32311243384a673d8a6cca7cb87359
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..644b54d008d2403a17b54dabe313306aae365b5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a41a76faa5360c493ff4588e2ef65ddfac96f23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc23e6606e8f0068021ec5e3e9d106d7ff7854cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e2f27110449e48296545f042e02872858911d32
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a2587c9a7331deccf8cf16c9457e3a0e08b9a28
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d97d1b4da5580ff5c1b8c42c1761193d668f496
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2672a09ed013ef76c7c606f7c15431152514e6c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt
@@ -0,0 +1,19 @@
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7218e8dac6ee6598e82b1f2b45621fded20dbab7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31a377a9db9d165bc8e510914c99a16e9d88c377
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b6c790d656e641c89d98aa2608256c78d1216f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e671841fcc9ce23020f1fc515f36e86e87a682fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bab50b8f148b77ceecbf16ff723b2ac2028eb953
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12d0763085a72534a0d2177780afc23b73cc7cb4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce2a69938ce851ed555be54f019c4254525c85cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c7aeb092d3f61d0b98091e13b2d944cf94ca52
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0520b55031a1fbc9de442dfffe647e1cdd58c870
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..435b1ae7e3c91f1c90b3d01674206c3f3d8b0623
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..382480221bf0a902ad1db770e0255dd9e73f6456
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf266981e953c801aa0f518273306d0e580c760c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edb19056596da59101ec7bd2b75f8e26e93d2738
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff7c99e4734e9e0fd57b5e52fd4261e0801b9045
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..799e5561f024e7c7097731e94802d148b902a673
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a11d4dfc90524f5841de681ee5e3cce7abaadcb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e434d9306e2beb9a2ca5689018284ad06dbfa7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8598f9c0299952e661da2fe5dfda7af5601008e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_sharing.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f23f0fa791794ed4974aa1e4f33b176a6c152262
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43ffa4ebc43d944ba96367841a224fcade327484
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_1.txt
@@ -0,0 +1 @@
+Computer vision
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdf69347b299c791e0a2c6b332cd893ff7f071ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa7831319b5f0db38043c15dc7739d9fa9ba527
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246f3acfd40a721b54b7d354f8dad610eab8849d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..669ff160c87f053b9bd2014eea399688c52637cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a20ad9ed9b0732a11344edeb865119e3c4d7662e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_model_summary.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..367598457b9ce8f2da951b7606a02baa4c6ab2b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7498b2927b7c07366a4177d5fb9ee838c5602697
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4176e1b9d67f69346c6151da922b24249ae05c12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b665969ed97be971d87b518a3cb15e30457b8e9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c29b7ded79eac49abf96140bc568b4ad893ddda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d2990fa40d2460b1e6f352901c9efc03eb133ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6a0c0a47369dfb40bfc1d6fcab59162b170ea7a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..facfb76d992c8420a286b14aa0391a2c2b36e8ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4ca1ddefff73060cf178a4bf86b3ac17905118
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_multilingual.txt_chunk_8.txt
@@ -0,0 +1 @@
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_notebooks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504f04ccbe55506269bfdf8ef52e645c1fe26e15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,102 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2fdb0444d477c8ca7aa30ac91c5ff1adb71c2a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92388069802581ca3f3049fb9d07f3e859b64875
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bda640047fd22c3afe29435cac50c4b8c91491c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26429e78be0a4a35e3b1b147f2fa0c644164eba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pad_truncation.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad17b78e6df4b8d366aa3054a2d1cf9657f8d3ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaec80bc50e6908f80048e0170400588cfeb4ff1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5769d1bb8d46c9ce7b79c815de2b03371ef43f3c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb228f471a43aabd176035ad041ca4da871f685
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a8fac217be8c361d231fe60f7152b9c8dc255e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ab2bf126f733510695c4bec332f736d1ee72984
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_hardware.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33e4551573eaea272c91eeacb9c57de3e19ca465
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..135d0ca59128e34320e87c76ffa415bca44b5d74
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7453d2d7c2dc7e518c1731ca09a70f237761a58b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec622286fe1a6084b430b6d997059586a178a381
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c76013e81b94e9a384caf1fcad3d4aa69b1b5a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25e8e9f6697e1e56dc1876553a65757a56e031ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8f6b087618ea71f99da3c7c8f46729dcd39e8dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt
@@ -0,0 +1 @@
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6473cd4c3c653c938acfccd24654ff2bdc95e88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dbe195ab8d16ed415c61c0b3faa6dfdfb970e67
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf8218a15c9d352d21033eb74b0dce0826ed5a90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt
@@ -0,0 +1,17 @@
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..300a794ac423bb4dab2d1cffa07cb6d7a1a558ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt
@@ -0,0 +1,15 @@
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a62ad1893dd496c7339e016282cedaff7cc903f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0204a558a7e2ee1b0fe8f2392a6853434a1bdcaa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdba6baf67577386a8d10034bee461f27dfe406d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b5d4a32db5ff108c09979eca3efcc657f961c8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt
@@ -0,0 +1,2 @@
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c409aebff9670e8dd2b402a83980cc1e31b07213
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt
@@ -0,0 +1,17 @@
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b92b91dea10ec36854d71a67878834e857dd0038
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt
@@ -0,0 +1,18 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abcc999773ee72590823a39b9d7c90c550904210
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4efc50824be80b5778637157507c2a15509773a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5375609023de67f6454d5b86dcc02441058fd6c8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..885159b21a966fb32e8e889844f0a53feddaa03a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09e8f531d6a18bfe71367af50dac8e33929e992f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,42 @@
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c52631c2389d6134316d28bd8421e1fdf1fad0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e9a961bf8ad0a9da202b5bea8474f5f006f8019
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4875fff8124ded960889e093adaf4862ba71f036
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36fc67cf4e5bb0e6f9efd9e4c6da3efa722a0201
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8413bb5976cd055df3bef80976903789b54804d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78400349a9daf76e450daffb6f79d11d806d1e5f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5c40d41802a5d3395792c2f16ea4c4c5fc8e1e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e7eafa6f3ba7859eb08a1dbe7dc8e98a36cd46c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt
@@ -0,0 +1,183 @@
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..027ea679797de32a8e02350093db63275786a56c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..952b2ce80fd2f1e4247bfe9611b18964a165949f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dc74b2dcb3c4f5007001b23e1ee9e1216df3de1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aebd71a9d89e7736e7d9a4375f6e230961303dfc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8fa9d9248968d237095aff01e7f47b9dcc9d20f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89216e89d5fb200dd2652a18af7709f84e73a03f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f76ee094b94f08584c43d78d190ffd94b5bb5e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
@@ -0,0 +1,76 @@
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e4bca254425e4ed1176a53d5e1785acc61337dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt
@@ -0,0 +1,76 @@
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f635b2893ffdd58028f0d20fde0acc271b64b0e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed9b6f04952bb2e4757e3483364b10c9ad332281
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2ea4fe48e9d09460079c11c3ebc81e8a940e670
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
+
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b20eba7ad3adeffecaa1e2df05ecf89a57fce12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..160e58d1831971c480005f44dbd05f0c80be82f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d29dc3178546597f5cac2f87d7b77b42d4c89a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d49dcb04f29f5c492d90250b6745b900da2e600e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt
@@ -0,0 +1,25 @@
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da321ec4d7ae7ddf9f6d1e1ebc3c7574475c176a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce486a0fce1e56c31922beece45fed7aa7991cb0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt
@@ -0,0 +1,11 @@
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient. 
+
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a15ae06bf0ca2ee943f80aa1da6b280ce23a1412
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt
@@ -0,0 +1,24 @@
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d73dea609a7bd4b43d0d070667600d4d436a47d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed4e9a3d3ebe23290143c93dd204278e5f025be6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt
@@ -0,0 +1,49 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72e2bd7273857e8dbf072d526ac42e339cd8e119
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a05e1f39f6d77d8fec5c1370eb4b3f4b793ad449
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt
@@ -0,0 +1,9 @@
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fd295bd6328cc05a0bbbfe87180ab498083c17b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt
@@ -0,0 +1,21 @@
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae0d9da4c36f85183f4ff0c2aa5d8717416c6ea0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt
@@ -0,0 +1,13 @@
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b963d53da1b830c5fbb5ed01970f4dc201299de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0eea8bba6581daf1a850db396b4a49cc3f064838
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt
@@ -0,0 +1,27 @@
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90c1342eaa1eaba1b5cca685cea974089eb41bb9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt
@@ -0,0 +1,18 @@
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01e026cd0eea6feda3251469e6b8752d7daafa1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt
@@ -0,0 +1,13 @@
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..897c2735b6612c1a634f7e12a8fda1dcb5cff63e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt
@@ -0,0 +1,14 @@
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5afce856a5f4d0d762107bda37a5bb36ce018c85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt
@@ -0,0 +1,17 @@
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbf6f3e5ffb3c0139fb82c95df6a629ceec00c54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt
@@ -0,0 +1,14 @@
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91d7164eb8dce02a14a3310a525ac692780a9382
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a05a0694c63e6c52b4c39f553fcbabda5ef04b4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f0d83c2cdc024743f50e13a63ceca5d92a43efc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59993883e19c3992a6dced39b4a8157e7737f5c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba653cfd520ce152c9e9cc7a06293e799f4eb90e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
@@ -0,0 +1,17 @@
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe523b1ef567f0934ae68a78cae028a8ad5e5a7e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
+
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f473e04d7ba7bd7fc0b9cceb17d7627485df90f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7cb3465c0b6a2972a19b624116e459fd0ced594
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17e2cee01ab9b8baea794d3b14d5763a3fb60f09
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4aa7a0bc856139e35291af0efa67c99801561a8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e60c9596aac6892d7caa9df36588825d88d21aa9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdfab93d17c76f2c5f2d9704398932a0847fd343
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c02217ea2818416f86c6cdfa54e3792bb7ecfa30
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a56cec5e66dfb2f33e84c9a86f48510dfa6f55eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt
@@ -0,0 +1,49 @@
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99453f15d14b4fcd96f2b26374064282f7dd537b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt
@@ -0,0 +1,15 @@
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a425ed6889fb428081f84643bcbdb4e77df41ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt
@@ -0,0 +1,32 @@
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f508fc17d2a8288fc0c8e808c57a315be3cacb0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d18ecf7075dd816c34b5e4d22ffef026ebe79090
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d5b559f7b9940724fcec35349d4b4a8183bcad8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5fcf1e748d91043e9582c993a69f845fccbe3f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,59 @@
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a78c2350bdcb205c81a55e83da9f656c64d4823d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
@@ -0,0 +1 @@
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44dc747b90bb6b6ffbcebf50ea57db7f8e828b7b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,45 @@
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d499106c57b0a0eee1de6d4b92d17563de8ee86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
@@ -0,0 +1 @@
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7e20d25d23e99d2c66385108980ec5ee3d6968
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
@@ -0,0 +1,43 @@
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98143e8a36c8c8d9dbe5fd8f4c3b1d3d1cdda9ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,27 @@
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..850a2cb646becbb9fcd91a9d57b81517cd167b42
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78b1b75fe3f9b3fe3fcfa08b84987f82817736cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8faadb6c8b0044430f6d6762b12d57df9697b3d8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1264eb2ec63d2ac9d0eed45625ff4950bf041b45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_special.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e07187c9d8989e7ca5774f47540611deeab88d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cb2387aa4ccd602fdd0e2da0c3979c6d645f432
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b24447026a323fe842384339a1b4f9c507df782d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a724dbf6970d241b6aa305e367940ad3866b188
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bec0ca07a28c1ac5aa60d885ba44577708920ac0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a8109cb5cd0efae9f74abba23e4e37808879fb1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
@@ -0,0 +1 @@
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a2999cfc12aa301b95dafea6eeed9fb93bc3f29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abf165fb79376c7837470de11aae13b4a6876d35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9689fe76f26d1388c15e7a9cc0d6b1ae2b1ba505
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3a83a7b76a2b6fc4f7e31f0889d4fce4ade557c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5dff2404ea0ccfa6fa14bb79c80ccad72faacf85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f5ca6c12a829b331d9aac244161163639c0928f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_performance.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..404ea13313e123384d90c48bfa8157d2ef0ab5d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75f3367112af9c6660fe7a113df0210580e6323
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ac294961119972d1f154a6fb2a1e7c4afb10715
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2be5a427ae5d8e7cc6a73fd22e8b01d974cb101b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac98207ffc71020c7a7601bf43ed1718792d5e7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45e86bb294fbda5359638ee4c153ca7a32ae498a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f90ff1d79f98fc426aa1c4e22deb723eea0d55aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+# loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaacdbe49af0c7aa25c06c30c4c9550266bcc032
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_perplexity.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c58c6be25ac7658625099100d13958ee9103d7ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3496e7cc322049a09d2e12c70b7e7bd07e25c26f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79cd30b42bdaae4545d4f8944b67c703c82014d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdb64c94518acf0bccb4d7ba2b05d0b96a573861
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f21ce7b01954fbdb5c4a7f8330fcf5660b49fa5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41b61b087d99c602ab87e814b4cdf53c7d93053a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_philosophy.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdbf4b4235ae9525fa2048d39dd81d1b50b19d48
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b798b554ff05af25778351b8ba5ca5ad64a7d50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31b32fbbe0272e9920032453ae1c86a658b18272
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ced7c2ccfc06366726cb65e41abf1e64b7473fe3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa7ee94baa29c0ec16744ddb2b8b386fe26c11c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt
@@ -0,0 +1,16 @@
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41cedc53fee7fbaffa257de83c43a68d17cf088a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dedad6d886811bef21b67c39e3d28e554f7c769
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..274ab9bcdb0a4ed01250975c480c28ae629d6b7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt
@@ -0,0 +1,10 @@
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae7dd66f35433979674b6f1707d9abf681cd108a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f902802d2aef874828758a721f15cdf0b4a37737
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b9faf2a82c4e5d2dda0f99414c4d21074c0764
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47f002366e43e9f55e57b95f49263c18ddaee0a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
+
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc44c49a624e7f1dfca407a8459c10f860c35f49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a38df389d29e77dccd8a6ee8fef8220466e865bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9eaa6aba2374f4cd5a298445d3e83b441e5971fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9cabf02068c933130c4ec774b61b97cbc50b8c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acc0fab397a1af2dcf7b0ff10fe77348327dc4ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
+
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5e45bb376ec102151ee42c1b1254379200eeda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6b92199f150d27688c5d9b53755e97c4087523d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1f1339c0a78d30fc7e58f22fcc24659c091b521
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87edb303cdbe10a0c39e690cede046bc90213073
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..734adb97712c8b07e5c868edbaa5970b4a1eb84d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt
@@ -0,0 +1,55 @@
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03911df353e66d3f87a47aedc7e7cb8aa567c28d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9569b3f99eb1363d5c084557da0f9c86b980711
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dff21b0ff05b18b473e01a90d843d74d582f02b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
+
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75fd20d9af95da5a36c8a49ce4c0791aa98afa9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_pr_checks.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de999fc74ea0ce97832d7470f2ec7ecaf4db9e14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d09aea2d2f51d4c9e3191f3ad68bcf846d18b77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5daf36dbc5ed4e8b9dc965af7b55bacb65829ff6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_10.txt
@@ -0,0 +1,28 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f506c7f13ebe1c1fb54f7ccba6669771a689393
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_11.txt
@@ -0,0 +1,12 @@
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a419c3577485f46c3c24e465902f693a63abba17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
+
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2a5a8f9d7f583e871a0d88810a02734cacb1850
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
+
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..519a7e711cff4f2a31a01ae68fa9f2db64d35fb0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb7821fb3fe6ae354bcce911ca3c0c0aaae277ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_15.txt
@@ -0,0 +1,28 @@
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22edcbe2390873f8f4b06e9c9c006cdafee310b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2fd3ad159aad56301a62c114f93e706b2847787
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d030433cdd8f04b3b2a159a415ccf4d84153f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_18.txt
@@ -0,0 +1,18 @@
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
+
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c10a929b2aa3adb8c44c123e700355886f918cca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_19.txt
@@ -0,0 +1,16 @@
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..729a286e883092f8813d815ab2f9bff07a732228
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..023c1b998aa004d78cf1f0cc1b9c3fc587a01562
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_20.txt
@@ -0,0 +1,15 @@
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e81931cbfb816ac94988bc0f7ad55a6b2422a24
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_21.txt
@@ -0,0 +1,13 @@
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d739f35063f3f07f4839afb7913c425aa95bded3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_22.txt
@@ -0,0 +1,18 @@
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ba08fa7772a970a636139ac6ef7d9092a8e07d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_23.txt
@@ -0,0 +1,17 @@
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc3955b5efd1679a0bc91d7f82488b0baf6b2fe0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_24.txt
@@ -0,0 +1,20 @@
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fe91726d163f71b2ba9a40af198ef348690bac4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4613fdc1024d5ebd3877079cc38d21efec9613c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2420cdcfdd23456d073b7f93779e109d5a3533
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cf66a2ad47430bda7348c3ea85d05eb4992243e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cba201de819b2743b0973ae9b6ad6dff18f06d1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4209d043dbf94969845c94903013115bdd6b233e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5387f8bbcf3cbbfaaff4c8ef33cf720f5832958
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_preprocessing.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4464475eb99dc248f26e2e9217ddb1f653813b98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c7912648088020e10b92178371945c6ddb225cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1 @@
+For a complete list of available tasks, check out the pipeline API reference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ffa4c8649a556e96c162c548d1ff46e80b2dfff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_10.txt
@@ -0,0 +1,18 @@
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de34901b769b8b20ded0cc7e68b87a5bc721c567
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_11.txt
@@ -0,0 +1,21 @@
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1316c55f2413e48dcd68f1ef8eb8f42333e00f07
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_12.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d51543c3cc6933530cab70079a570c6468720ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_13.txt
@@ -0,0 +1,19 @@
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2216be64382941fdfdb254e63d3a54388cb4bda0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3c13ce1efef35e5f5445392dc8d944f8a392ccc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_15.txt
@@ -0,0 +1,19 @@
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e777dbf0476dc8a9eafe1277ccef0bf1fb29ccb5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67c2e4f151b7734435f25560502c044fd3b99722
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
+
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aedf9737b3904482624425f89a14f7748acd6590
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_18.txt
@@ -0,0 +1,18 @@
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a886d3e2c1afc543fd4000897564648c6a0a47ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_19.txt
@@ -0,0 +1,25 @@
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..349fde3acdeb8250cea8b62b9844d5d9e448aa66
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cba247de288fc1d8f77b71d0ad04b00397cb83d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_20.txt
@@ -0,0 +1,38 @@
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e71a7e309d5f9e05a01d17aa2242799470f78d8e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_21.txt
@@ -0,0 +1,5 @@
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d98a6f3e59b586c48c023746fd2ef66f601bfece
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baecc8421639dde2be3f580c56d0517bcac1ba3c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_23.txt
@@ -0,0 +1,19 @@
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed0c6d6850e0b5b556053d442abcd0e22b948d42
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_24.txt
@@ -0,0 +1,15 @@
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e04d04fb4397b2813f6c0a22567f9a4532dc198
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4efb1b17faa7f3b2c84998f6b85b060405bdb789
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70c150fb9fff6a18bc5b8b522edf5514fa68d484
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e52f7d7be3dcddb4530e84ff85714145fbed9bbd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b00c8fc317473c2a44a852d963b24ec2b0820562
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
+
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdef8f821509a5c49daa664b8af1ca95c0373f1e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0bfcdeaf8ba249dc892b9879eace74e50b6f017
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_quicktour.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f7cd7e8f7dce6f7dc43c690fbae1ae60e38d876
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..832815c6c2a47900b62b27bbcc916f54c012736f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10cf6a8b8abbba09de73cee609b127bb0f5ce437
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cd44aecad63969eec186f78ded2b3d5c12d10cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e2808b47fa9397abf9415a8f86a3cc2e4cff145
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a91888524ddc7a4bc796e3136f27b91e9c01b489
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_13.txt
@@ -0,0 +1,15 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84eea0d3ee367572df4374b8f13e1c64b479ba90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_14.txt
@@ -0,0 +1,19 @@
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9d225ac8b641c448cc0f2a90097c8a8e1b81bc9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff5b53fd60da9b67c3789362257d4ae3705ed0b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d38f6cf7a40716c87ed0004b632f2d8ff04a98ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d90097a50956ba518111e82a90d6def66a370329
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..625ab14374e97ff4b2400080fe8fad7ec57b685c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..472dedb4ab72bf1e7ca4212068edd162609d0e70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff01761f93cbc6b7e759f5466dc8cde75fe2fc68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e67475344fb55e54cd76f1b5c1894709abfde5e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_run_scripts.txt_chunk_9.txt
@@ -0,0 +1,22 @@
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_sagemaker.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c72d4eb3bf9a20dec631a66cdee7e26049b8464
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b03e5222db41b625184af2170d9cb23e6956fd7c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eada4401fd54360ad76f82db770ba2e027df6884
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28387601f64de2427ee798c8c0fda2eac2a524f1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6c00693598f7d64852142f8daeb8c06e8f9bf27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c50826e03ab99d7ed791e07443c84daa6c866adf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22c3b1b85645078187c0a7a5551d06f2e9c6cb8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_6.txt
@@ -0,0 +1,17 @@
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cffe34ac90547e403558eb496c30f821b659e979
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_serialization.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ae62ff4499cb61ee5ef1fdce68fb1a82fd8ecaf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bfad9ef579fcffecb8ce39762afc29f13b1f9ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
+
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2337511d043537e4c09a296159b7a998e9ef5e07
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_10.txt
@@ -0,0 +1,12 @@
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24b1d5031fff3cf0bdfcf966c493bf5f2777d7e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40d032420bd8c4c7934a29ed34371967fb127319
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29bdd0b0a7710acc0d154637dd3551e9a84df331
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec6a6235ab93ab89a5fd461b3506273928126e15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_14.txt
@@ -0,0 +1,22 @@
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b5bc9a89595fbe9a01abd94d18800e8ee03275f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
+
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..feedcdb2c798308dd4b91c74516e6ea4e6623a30
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5bb01e04db4cf700244e52bcb26cb5ab78c9e65
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_17.txt
@@ -0,0 +1,6 @@
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3edba3ec5bfe4d2e70e88f48089aadc2544891c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_18.txt
@@ -0,0 +1,6 @@
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..522a580d61e46444511a16a613ca1e3c158497da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d19ccad34be4591fcbfa40ad0207b36bd5ff1995
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4bb99b3f96d21e079f4c9b9414cbc1b5a500b30
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_20.txt
@@ -0,0 +1,10 @@
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
+
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff7f87257e15b89a1017bc0ba53a1cca1e1fcaf6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_21.txt
@@ -0,0 +1,19 @@
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee4e1e4385b1d891c545c579b1c7c4012171a8f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab8a445076c570e004a6daabc6efcf955e0b43a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_23.txt
@@ -0,0 +1,14 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c5e3987647ffbcd133ed8610e5b5c4ac4036019
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b272bf67aa0116279149628222019fe83f4fdb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa174d331e71159db8be9264a8ab87a1fd0785b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69ff6be13a94ddd3c9f9daa52a23f808efccb055
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a6c9a05b156d1e502197c577082beec8edab7ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7831ae5f5534cc575c839bc8d574653d7fba190
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13aefc32c370e99ef69c6c2d8d5312a77170d585
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_task_summary.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
+
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4263bef7afad00f7644b3c1a97f92448cf9dc2ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5be6bedde2462f90d34e6d71087ea730856506e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ca49794861979f3b5d3c27f804a57822cda3ba4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2079ac739f1a08bcf482d9aea81bafae7e3cd78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0df4a8e385a59fbf84c5cd6b6a1e1c405b9fe997
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23e55f297c0e3e792031fa93b884cef2b601b617
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c31b3df520a050443d6c42e61ad4b488360ef1e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_14.txt
@@ -0,0 +1 @@
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f398dc0dc9294632bc4aa8522d3672a92faae3f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76331fcaacd5f1efd9c973dfcf9faec0e0a84365
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
+
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3823c780ed209af89024839565f8d39a47c5126
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_17.txt
@@ -0,0 +1,6 @@
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
+
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e4192cbd4bfad81037dc1ba5457ef43cf52c9ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_18.txt
@@ -0,0 +1 @@
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6086d8f4a7f0eec9a131394c4160c8aa45752cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_19.txt
@@ -0,0 +1 @@
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e5f4f60d8492d87124b0f925db76a99ad69f06f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c9fbb6852af5ad71d50c5df950527f3e0ec9026
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_20.txt
@@ -0,0 +1 @@
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bea87f37ab757fd9b52b7b79f023bea9e0f34a76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_21.txt
@@ -0,0 +1,4 @@
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aac02fa020075ec72f8c363a7f9e9f7e5091aaa5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_22.txt
@@ -0,0 +1 @@
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ba312343718b746ca1b001280ba21cae2dc1a8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_23.txt
@@ -0,0 +1,4 @@
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83471b38125bd41714048c356831b89eba862480
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..816347ef9643b28a4fddca22e936055a059d13f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca2cb0bbb7c0cb02af497e46101e39134fe48f8c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_26.txt
@@ -0,0 +1,3 @@
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fda0f1f1df6c9df89d16837c8d41448c8aedfec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_27.txt
@@ -0,0 +1,7 @@
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b7515b5e81c95e7dd4c38d359d89bd313b464c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_28.txt
@@ -0,0 +1 @@
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7439a557a3fedd462ca014bc0fe06f800851316f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48489771fe9a913752e781397c765c0d7bbd7c7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_3.txt
@@ -0,0 +1 @@
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d286e1cf0f12619e7b03f6a70ee70ef91a3c66fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520fbac348612a268b6d847b7157c7a648029a7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_31.txt
@@ -0,0 +1 @@
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7869ec85389ddf66da9bc20f0e86ad3e0afd6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb9d260a60fc39566ff62b4c8446ff48babfafa2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..806433ea485e82492209eb43cb1d93a8cd31ca9c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00dec666de79ad388fbe77dbc2d4d624727bf42f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3802970fec509d37dc0774d188634bcc1ca28658
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
+
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee397f4a6a325b8b47ec4252656a4588d14625cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tasks_explained.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6e0102da534ad406b6f7e49f79019d3f363d874
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ac3aa1bbf77a580baeb4d9e34087733371107b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be9f82c24d676ccb3d5804f4fbc355c472478343
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_10.txt
@@ -0,0 +1,24 @@
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23789fb372a145971fc0712f3b8ac399962e5f45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51eae68842c179724d0065392c10918110e4cee8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_12.txt
@@ -0,0 +1,19 @@
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3276b3ddac916d428c0c4b1e0d1734b99d8cdb7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_13.txt
@@ -0,0 +1,19 @@
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ea0fa11c4c1d7842851dd6c20b31bd8b8c16db3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc6af5994cee1026f8b92c07d2a522e26041bd04
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_15.txt
@@ -0,0 +1,31 @@
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0573238f0cbd008b119a75d1af416a9fe302c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8838e2bb1cec8178bdee65b8665b69361b0a1bd8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_17.txt
@@ -0,0 +1,21 @@
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b64315c84683b66e2741a3a4a224ccf2ad64311
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_18.txt
@@ -0,0 +1,22 @@
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d81b7edd1626a00138fb16d6727409b0b6c1df0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_19.txt
@@ -0,0 +1,4 @@
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a88666512cbbd63322b5d993c425bc9b4c43c45c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94d7073b22b5c6373a89fb8d2c101d15e45d5f2b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_20.txt
@@ -0,0 +1,24 @@
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4acfe5925b9d999e82b33f31065718cd15f70904
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_21.txt
@@ -0,0 +1,18 @@
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2eef8d77715a4ddb33f22cebe29d793dc7460c37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_22.txt
@@ -0,0 +1,19 @@
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab2ef0208af81dbf3466de74dd901da2cdb20c69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_23.txt
@@ -0,0 +1,11 @@
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6be0894792887f03684fb791125be6697308f9e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_24.txt
@@ -0,0 +1,18 @@
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6782762268fc99d87ebbf636946b147e4243f6c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_25.txt
@@ -0,0 +1,19 @@
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df994311b6206831efdaeaa33706ddd3eb98e64
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_26.txt
@@ -0,0 +1,7 @@
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ead3aa757fcf7bd2bbf96266378108df69ffee1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_27.txt
@@ -0,0 +1,14 @@
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cbca823a5ab559e0333825fa47908dc9a990200
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_28.txt
@@ -0,0 +1,22 @@
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6687fe86e7258a54463fd6981bc45727a16b499c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_29.txt
@@ -0,0 +1,12 @@
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17d3526b886816bc0949942411fa3601643ea2e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91ab51a8704920458f3e5bf3b41c24ba29e89b37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_30.txt
@@ -0,0 +1,18 @@
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
+
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be09997f88622d000bee313da46ca429fe29696b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_31.txt
@@ -0,0 +1,9 @@
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d64f3523d8e0a04fc8a16e936b377161564d683
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_32.txt
@@ -0,0 +1,34 @@
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_33.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..786103fcb9a80cc8554f7881e738b977f6900bde
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_33.txt
@@ -0,0 +1,24 @@
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_34.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a4b1148387535e6c43dcbc92098ad5086f57ae6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_34.txt
@@ -0,0 +1,16 @@
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_35.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e5a65455c821ae7f8c2b1c26af1a778a17b67b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_35.txt
@@ -0,0 +1,9 @@
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_36.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd591116d15191c1393070f3e039d458e6510739
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_36.txt
@@ -0,0 +1,4 @@
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_37.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b78b1a37e3e272bb96cb150063301439dc3297
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_37.txt
@@ -0,0 +1,30 @@
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_38.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eb5a62305c3036672fdd93cc150f48f3afd0c47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_38.txt
@@ -0,0 +1,30 @@
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
+
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_39.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d934b862fc00104c127a6b1dd480ba4febda2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_39.txt
@@ -0,0 +1,20 @@
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e6e01b8dd997ed61d2e95801132387d2c6fd581
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_40.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5e7eb3055eba60848410e3984718e4f750417f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_40.txt
@@ -0,0 +1,27 @@
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_41.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b8d81d8d3cc5ac6301fae661b45bff68bf1105f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_41.txt
@@ -0,0 +1,18 @@
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_42.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18757660c2c0a134f8c01e79277c891250d007c8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_42.txt
@@ -0,0 +1,27 @@
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_43.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5664f910c02ae65f7132cca22c98175a5cd0870
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_43.txt
@@ -0,0 +1,18 @@
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_44.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f17fff61e97b44f3bb14a6559ff8e9825cf3a6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_44.txt
@@ -0,0 +1,13 @@
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_45.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d86dc8089f9953354a0e8d01c497c53daf19d74
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_45.txt
@@ -0,0 +1,17 @@
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_46.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c936b23ca3943e0e55f32e032bfefc26cffb695
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_46.txt
@@ -0,0 +1,15 @@
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_47.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31a1d9edfe1b57011d3ffe2323e1ce825f55ba72
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_47.txt
@@ -0,0 +1,4 @@
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44bed3b3d9db4d2a65c93c7f86205afad6ada104
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2111064081e350f692a392ab602432e4e95b10aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aff120d9b78756ddfd9a2ab36b5485e86edc8622
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8583a988742bc3a14ad824e7baec50c1bd37dd05
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_8.txt
@@ -0,0 +1,22 @@
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a810731172e12edfbddd7c952e9d0fa2a4b7706
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_testing.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4263b1ee45aeb4297c977efb757a9fc206ba092d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fab5a5491092eaa3931d62e6b02d39ac837dc467
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5000e63e1065e33f95e50bcba3d7e8d14cf7a420
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f10d368f238e436730b9176dc69bdc89a14b2b89
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70cd4e43586595db51316581cb3c55dbabe6d40d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c15a3f57dbc3f3b4a5e1db80c6d8b2f9ebd498b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ffdf0fe3e7c053820d0d40059fb55d02e502881
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c51d75626e6d5db5a470104c3da0d11b9028d326
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc86d9a8ca294b2fceb003850bb5b0e2f8edbcfe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tf_xla.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf0637e4f163a2c445f78e93096b4f11aa472f46
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0cdf6f0c1dfc9b34fd25582450e33c84f51a0f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8b971bdcd5200b029da3b47c899111f194d876
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tflite.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6400f8e14c663763dcc1f92c32e3be19c222de76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..911740102630b02ac3863ffe257d56d951aa04ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9220661b878751cf3caf35c46a6ade20f24118
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt
@@ -0,0 +1,13 @@
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5870b1774d4a277807de2d5be9a73c0e51bf8ab0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2435660b96ca03b4642f143aa68f66a44af4228f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..508d8c9df1e87eba2ae6e6a4e362675bcc7860a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b0226c7e1ff69919e9bceb3a51c4b5e89c6a05f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
+
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bc6b4a85628aca9b5e50724861c460cf0955e51
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17f96856aa0001480ccee5600d91703a37a607b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt
@@ -0,0 +1,47 @@
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1fb57ec8f998537f0e2caaad7dcd1150c9b56a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe9160a990b39e1ba7cf59e367aaa865c151e59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt
@@ -0,0 +1,25 @@
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e53c51afe6495fd91a98eb490de00109a1f2144
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e44361b09658b8aa22f257e50fd771a37bbb79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5140a31710a3f0efab9e394333ded877e5eb5307
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_2.txt
@@ -0,0 +1,68 @@
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5837e40579147af73651f4b80fbdb87f44de020b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
+
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adaa241ddbe1ed51a2637d47de1ae4b27f1f5863
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66f07387277b62e42b688bc071f1c6b6ee336662
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_torchscript.txt_chunk_5.txt
@@ -0,0 +1,36 @@
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..895c63c480957a5eb237258425b1f3348d619d08
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b596991c5ade07002e14250e9ee5e1a70ca2b6cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b96929d26954fb4aac7e502b54b15ec3c5be9fb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea24d441cb6a15df193ec6b9bf55f28b9e8709ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_11.txt
@@ -0,0 +1,19 @@
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..555c74d0349b88d30acb1df8a6b750edf2ef4914
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_12.txt
@@ -0,0 +1,14 @@
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e81ed79ce42e7eacbc5d5cf73e8b4367badbc3b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc9a24e58a1a71e17223052718d0388fd4815029
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_14.txt
@@ -0,0 +1,27 @@
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a56a885f47379b3b7fb9a4ae70022dcdd8f0596
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_15.txt
@@ -0,0 +1,27 @@
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0782acf9ebd21c94baa71dfcc124e0466b8eda0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_16.txt
@@ -0,0 +1,29 @@
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7992d4d56d6086d505ecea205b2b6be4428b7091
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64bc6dfeb317ab26a5a0d6c0571168b5756cf045
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_18.txt
@@ -0,0 +1 @@
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a787d4ff655d5cf1c218adea5f5283dffff73b03
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_19.txt
@@ -0,0 +1,30 @@
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5fb81642285e228ee826dc8d2810bb7928dd3ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9417a5c8e2c9eeb2a40122de373037383b54bec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_20.txt
@@ -0,0 +1,6 @@
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1312fa3bec280a6a7642517eae19d12d69567780
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_21.txt
@@ -0,0 +1,18 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b683801f178452e2cec874c715e6c33287931cb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_22.txt
@@ -0,0 +1,25 @@
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1c5be31a4c2eca13fa486dc8e519c1f8b392ebc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_23.txt
@@ -0,0 +1,17 @@
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bd013afe844ae5dbe702d16527651a6972e19c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_24.txt
@@ -0,0 +1,22 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb246dd0ab9fab019f786249d039db322d9ca164
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_25.txt
@@ -0,0 +1,16 @@
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12aeb80e5bf0df0a695e0ca2c926bae271a1b854
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_26.txt
@@ -0,0 +1,19 @@
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6168a0dac61ead7a59147d5d5fe1c8a6eb4ac6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..458645a674033c0b8b69ff372d0b1aa065577160
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d24444647b6a9a4ba745a4da599d72d6c16fa4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be6ee2833617a6baace2ad097733f4170b321516
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f515b924447da08b31f98a0609d25619ab0b8e3d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adb882ef804e7edf7bd3d856dad1610185df1bc4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bac1be8d81aaa9ae3406232b8b8421375c565b35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_trainer.txt_chunk_9.txt
@@ -0,0 +1,15 @@
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
+
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e937005391b92f2fead586417cef6143cc2b9ee1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c873424565bf7077d3bab9ba8e3a17f3603a456
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02d8218dcdecb17003ea86f9ca99382d0c27b14f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1760ef70ae290fc51754c79332d94cdfea8396ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..929e1a32ce3c9131dc82be7c2539ce960fb01e49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3592001966b30cfd5296fdaef08084ee16633bd9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_13.txt
@@ -0,0 +1,5 @@
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20e83dffa12bb75a355e076ba2468bdadc6a2d9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de2baab3c5d1bee2c9def909885206b5ac698a35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_15.txt
@@ -0,0 +1,23 @@
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a50734176b58f84afaec9b028d46c5993195f97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_16.txt
@@ -0,0 +1,19 @@
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c44632ec621b37967fa07242de7361208a6b5151
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_17.txt
@@ -0,0 +1,23 @@
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a0b16b167bbd5f19f0791716e5c76d6c9ac5613
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_18.txt
@@ -0,0 +1,25 @@
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffc0269173726292ca9918d41e7f8cef35dc04c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_19.txt
@@ -0,0 +1,13 @@
+logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97a5203211888d3ffb32262f18b1a44fdd994d60
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2769a103f6bd21bf3416ccad7455a92a3bad0b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08e6d10d3853433e6f323227e7f3fd2e238738e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
+
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..188ac2957066585ce9bc415fc33abfc6d766078f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55874b4bf3a3c28a6237fe48757396498704588d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e306b07b59ca46b01e3dac40282da850dc705848
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96eaee6d0ed9047b09ca4615411f709159db8af4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa1cb32791a4427d744e4b9972d81a896fc01982
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_training.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ff4cd7e7e2cb1477991505f3ec5bbd2293c1654
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c2af4ec4ceaf54b8d686c07dd2e5ba5105e7675
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5da742a1a251b66466cbada65573f952673e8e91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ec30fda5022c26a9ac9b2d9b5a09d7831edff73
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a88c836bf1ab01fd00b483ba3c1334a14c1e109b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0751219f807e2647a2abc4423c843b1d2781833
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc137ce7612793244eb2edf79006b61f7c1881a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7c2264e030b7c7b8bbc6ce388a0802f897f827c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/_troubleshooting.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_audio_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_file_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbe82d15dfd5c78b77bc08ee1c807f3f1e61420
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e428d15eb048edd877146d8cb75c45db471ba7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85d68ef683eb6885ecc23070c3347b5a5430b7c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_generation_utils.txt_chunk_2.txt
@@ -0,0 +1,223 @@
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc6cfcb80bb56408f29b2d01c986f25ff24ed4f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34308fd561642f47d1614dbe0df9cef1a8347652
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c8f1a00f5ad37bd4c0c30fbd7a4bf308cc9da6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9c12c2c19412a235eef800fee95d4e85b052c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_agent.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..199bb881b98c15f7036e4b27142e55e5fc1951f0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b986eed86c85dd4561536d16caeab0a3130d261
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_backbones.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9478d18966b090a0040d7b1fc824e3519400c307
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d67bd5bc357b7d63c3b7a3b9d52119dfc0375
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e17a294f6bc27e307c3b5ff224c2830b38370d67
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce59b51bf886489b288da25d4b64de884ef69a4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_callback.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_configuration.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f39666365c9363c7c65c1077024bfefec8e019d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..977f4a846d01de071d9e323b029eb32076a19890
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..258a78bf997f3298574842f25ad8d6efb8c12642
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1ec1a3f97c70d7fbaaa990069da85f37774b508
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa046d9a12336eceeaee6551868fbe0c01b3b7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_logging.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c68a40aa3c0081cdb714ae21c95545dec439097
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..376e6f318f8b6683b5541c77416bbceb5c1c1d23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_model.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d10b844c88626b0a3a4180178f3a87f846a6f859
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d0da62226b6b3b0aab08b97aecb072ab2cb9feb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_onnx.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4956becbf1f3cc915fbf8ad5a816862015911b83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88f4eb7b5708e604ed6419b09208cf63a4e82b9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..424c8d3412ea9f17d89bd01e4286199d02168838
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee9b80954a6d432f752691d4c88693c769094be6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7243b91fc0700f26781da80dd55665609d2fe0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_output.txt_chunk_2.txt
@@ -0,0 +1,139 @@
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be6265b8661cc377987c3899f0a0e974e416e64e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa6342d73cce1591973d66bba2986a370b558cec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce0548fa3ad928ce6917177dded5382743acf398
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4c8f6ac901e12cbc02da5222763445a0ed6c972
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt
@@ -0,0 +1,124 @@
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f05e245097c1cfe9cc9ebc21e63c233540b4bc45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19519abd080177e95695ac64913314203d81e746
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f734ae70e8554008c809f190be2c4d4707d0d34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a805b178c18312e961d183b169f48151deabfbb3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58ab8e633ddce0894c3725d841133ba472887083
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt
@@ -0,0 +1,25 @@
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9efceefc18b2ed59b16303fabefb594870513860
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e1a6179eb3ef8945e6cd6ab8b7abcf1a48e21de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d51b490c2cff44a802435da1b8a9ece7f2b60404
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt
@@ -0,0 +1,26 @@
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16d400209b905cc3ee011310b572150fc4d5a0f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..027664b5965df5de3e42c4388742640431720085
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6a5e8901ee05ad5c745d4d36fe298c61ad4e367
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c197f40cae655eefd4beec97898933a8cf4e0ebe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6424787c72fcd51db9956940fa47ecc82666a671
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9861c921391d7209c2dbd5e377a571554fa0ed81
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_processors.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_quantization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f06d4f5eeaed4c5f87655fb147c5d0b3025f8f3c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..addf8ddefd96a3d0f3a79ced5e621e655b748220
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e465724b8efa0650a68ca56d5d0991d0bba8fbd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3deeb09c58b5577522e030ca1ff9324535fb462d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f6aa7c8ecbd5cef1f127d67bd30365d78f88c69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e6fcfcfd592633602a334aa83adf12e7bb25da9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fdc245c96eaffbea6fc5d6c04d8841a72c4d968
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt
@@ -0,0 +1,35 @@
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cfeeeae14ad4a64b3e1e682e354e14cb41f4e32
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11adfdf2c697e958ca072c665d1e90190cec01ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/main_classes_trainer.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..305d935a8b275b516334009f8aae3f1d231f8508
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..589f658c6dffd4b9c61e55d375b62fccac124177
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ca4d0fb4a84cae339a273f640c0e7eb7a3ad79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ee892443c6dd037b1d9be46e1eefd9338ef2de9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e4680d03f0ef2939a246ec8d70b9f8afc42cea0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dcf18c319ccf893b4e06e4cacb4aeefae6964e6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ce25f8c224bd26e352973edcb58ccac9dbaf3a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5471aeef081c8c4372361439edcb94ffb2d2c7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..daf5a7c5be878fd3a931c923323f24eb52adbd4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_albert.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6238d1a56ac8e8f20f6a5ec7c151e65309fd747
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d35128b2da8019b992092bd6b70900693190c61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_align.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e9c5f5f0191d71153964c62cfbc66360ab1fb6d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf5a8fb5f4eceed3e1fc23eb5c5d43f32476f22c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f6a2f59ee43006c711432498fc6fb310ebd543b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_altclip.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcab829a4de96ff92d1747c780a73283835df73c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f054a4ba140f0bf709b56da2561ceb1c75a5a12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52c6c68debd637bb6dfad04b7163b9cc5e759c5a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4d7b261a0b226a117e79cf9bfbc42c12af860db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794c923d2449ea0b6a1588537740ede542a5c125
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea124628e4ba8781b5bc2571e111ff36fcd82aed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc372e5a873cc378eef3996b248aff59f54ac0f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d78657f67f914714f137f614c5c89c4b7cadf78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_auto.txt_chunk_2.txt
@@ -0,0 +1,162 @@
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b7a1210316e667b9f44aeb1e67fed1eab6e3da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbfb372b4353e07b3241881e679d5912f22b203
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49053ba62af28b882ddcff5e10685005c213abda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f64719669aaf7d7d1d245a20f60ce6ecf465c2c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29ee7d24c3269bb836dfa54a3ec5b9fe7a2ef08c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aab612fc31584fd026696c5def2976ef89255fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8935d73be555e5b3bc6e649dea6a1c0af5c88734
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33e2412aab209611943cbb8f58df02a4496cfbcd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..176839b1731911971453ceecefb3fd4f13cc5a38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40cb6be5eaffc2fff6dffdb7a01447eedf5b3b7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bark.txt_chunk_7.txt
@@ -0,0 +1,40 @@
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
+
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0179400f1df4998b40f37c6d759e2a9fd559531f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87f82cf0495ebd9400209a9bea37f96201c6d9d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07c98023edb973a0fb1206f8ee04c3c2908ea1b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45694453d135e5fa7cca14b9c2934b8aba8a5fa1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6328dd802358f3b366fc89a868e58804899219ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f988f3b840a8cb7086961ae8b42b8087fb537e81
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8d329c19adbe2767974f94f270bac197c3a6998
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aaa6fbac4a4d750c9b56489be947056bceddab56
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_7.txt
@@ -0,0 +1,25 @@
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bb05edf7a5c167d1a69724620c89231ff7f9e43
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_8.txt
@@ -0,0 +1,25 @@
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
+
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..125e37ce4ddf069daf2de2e046a52376d03bc241
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bart.txt_chunk_9.txt
@@ -0,0 +1,23 @@
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbffcc6697356bdbcbbd85bb24aceab6779ab938
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8288034d9a7dc7647f7723a7a75fbda2b41ce10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_barthez.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..058c1e437ddb5ab8a56c43bf4101ed731fb12616
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af147dabc98b6c67bf32ab2d8428a1a3853f6ad9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..274bdf8f271c88d90a9ebc18e874283dadaea9ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c693fbb3dc2f2ece40984c86ed7d1fff6de1f6bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a616a49710f3b74a4a1246f2dc5bc983b795322f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f97b2eb6044a20517653409ac06dd1d3b00228e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87cf61b3b6149db9d76b374d55e40786066fbfd8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_beit.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df145fee209e963537ef9bcc7dcfcfa3dc53f63b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14854ae8154cdcd28862e839ceda6d92c3fcf2f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42adfc74b8ef8992f9166635fba823645d7d9da7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..229cd6fcfe528efc0c434b97f2a28426c4fd6e21
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8918a449cced446f46ae32e417d7474c1e468e19
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..825d0c63476bc24ce64390e319eccd4c1e122198
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcdf3a2f49df057cb73162f25b3c69acde0e8e7b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1 @@
+BERT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3635b17ca3e608b8131eef73238d5fce43b35be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9a6fd0395158f6c30b0b102639ce2d735f356c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_10.txt
@@ -0,0 +1,27 @@
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffeffa3399546c0928705b8e638c13ce7dad327
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_11.txt
@@ -0,0 +1,27 @@
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6f6c5a311323be5953273b7c59a79006d5a3c8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6ddfc96dd12b1eb986b97bc6fbf3be9ba5ecd2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..266ce90f16c4d60654db5c1cd7faa76c61f71a4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..848d6d0865942e3d62b47bba35e15023ddf4d579
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76e49aca30e832616f763504d5493b5601e56d5b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83aad3e8a7a76beb8a6dd405748aefab1f23cb78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c35cade6f6cecb4e76983d79683e9eeab65018
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_8.txt
@@ -0,0 +1,25 @@
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398d45e84e13bacb5ceabb270d18a0536acb5f7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bert.txt_chunk_9.txt
@@ -0,0 +1,38 @@
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
+
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a98f99c5a29eadf88e36e307a1e26534b674bc2d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a198f65012a74120c48e35dc942b6d46e31fee7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca5ed16b0d83d13a243fce461885a0b09643e67d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8b942705d0828c4fe15abeb52c4241ef56be4ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d17a09dc5aeddee3aa96b1028b9c539f8ffb6b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1cd5b636a113ffbc4bffc3d6ad9915126bd12e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8da8cccbb98e4ec723b134a12b73a2361171d2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcea14beb71eff9b9a5eb99514d9dd18fe13904a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8db43385855a263d4b8fc2ad0624a202e41c9cb7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cdc618da7c82417634a60f8c6c23ec7c4e71a2c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d758785ba5094215b574378dc322809dbea14af0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fe7255628d9d903b141c80220504c82c095a090
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a2eb747f8db35264438e06db0e8f4df4e8fad1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30eea26a58678a9287a74a902df6d007f3c8d077
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da79a9052fb0476811e156ec9dced67601ff9b06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bit.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39cc56bf95e76a6dbe57e3c36d9783ee865c3e98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db23bbc5cf1064fea76331ceb31de17f6d20c10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..496fe395de09f09fb408f409ad03765814861499
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04a796528c519c3f7700c9f83a102b29b1bff9ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5c09b4637047bb94b5cbbffcfb001d56a6fd22f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80ed48aea448d6a2f1b4fc8c75a4bc5c0cde2126
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bb9303ea4d018c8576eadc7eae522a3db82795d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19202d00847749fd62ffedaa401400594d60c5cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb613e4e63c359a027f863d46a018356ecf5c4e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e466b91f92a39ec633ec1ebcf299b00e6bb8a55d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22f2d76deebf413705e4f79242d624833a257e6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e5fe3b23272d532d01e826b1f4677a113d7aebf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..070595fb6552f25d0cb67b9fd19bf6ae72276c25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..792735ced92de764ce986da3609fc32c275decd7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_blip.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..055f66f61deb66fe7bbb8c43db2d6ce90a729722
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2962cb9354b6113f93da3dece843ca5b3b97ab2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..879c10c2639f9bda3cd4bce2ed86d2e9dbdb4679
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bloom.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d925f493769304b58d339e9395735501fb0eceb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..754a0aa8fd9dfe7efdd7c43c42cc12dd717246d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b3be9f8af226d4480e9d3e983b8426c00652fd8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bort.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d0ed5c1917e1fb938a7a186271d7c0f20e6b97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cbc5fffadd0feae02fffc80a6d81204d705467e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95125b63bb603b22fd7eb1fdd87b5a2213152a25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74490b87bd1da1e8a72be0226816f73651f260ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
+
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8c98c18a6013f1fd35fee12d0a4cf3018d46087
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2f0ba7d5eb61c8c5b07ec16a735e3b67049dc58
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15f3e4c98625287fd7e8fa2903e82340851b7001
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c607a29870136282c5909081faf80c9eb75fc9b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08b8cdb3eaac6534a24cad63d1bdfd623aff3223
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
+
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8206173d2b35578ad6ea0dafcfa0381f9a80a2fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ebfcf4b564c4267885424a2b6ef5d8df4d12b37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2b5b13b01fdb488d412bd03dab341413cdb5884
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_bros.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47a26e26bc6345e934e389b29fa0a24e428e297f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aaf4d36e12c69a40c5ab253fe28aa9bf8ebb11d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ea075594dd04e8ff5f92d5933fd5a3a68445043
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6067050e65c288f1297b93e866bb02d055714e13
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66c59d038fbe59da16e2ef96cb645bb62ca41213
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_4.txt
@@ -0,0 +1,35 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0631a1f11df5e93d74897acd4d6e95e9967094a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_byt5.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f57fe2686387caa5191a5564cfcb948ed7fdf802
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ac8ed78ab2c79390407b467203129d9e06e28ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6df7a6618b132b6c92dbf82c3605f69040198676
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_camembert.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38eba1d98bc94cb3cbfdc33a66a994f14e5914ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32963c0e47a47f62046634f4f97f422cabd7768e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d078435adf3ffc3a3dfa80494455dfc50d8bec22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69e3e9a19bae0a2dc309590100da8ee813d1a9a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..315861ac09ae2e28fe3419f8333adc9da2723de6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_canine.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f5218d4dc1537da34f22fe02d19cda630c1903
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb4c108f8c8758d682f5320c184dd54ad920185
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a650bc16928cc2b46a4fd28745b8382d4ae60def
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
+
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clap.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651c89af78bbfc448db279a2a241385d8698e566
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd57d43b5e8a6b4f9f9a58a8aa37bedf6b30538
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e76dd03cb130137446e4ded8b6a02ca9eccabc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c22018ad536c6ca0c54c1b3200a2b9b38981fdc8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dea4fe02a687b8955cebb594524734cfcc1b6d8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c67c149ed5d01581a8e5691e4b65432c1e8f96c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clip.txt_chunk_4.txt
@@ -0,0 +1,47 @@
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
+
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16b06410b7c46bd437938448c2e87206ee9cce5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9647e059795a6ffd46b9674cac4d20df0c1fbe9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..083923b0a54cd0118f8cfe02e435884968894ede
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc3296fee00f4b612c159d16db0b07e51f18b8bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c18640433b672a07a8130caa75c73d6ba21fc56
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44ceaa65cbd85ffaf07144372e82423267407ec1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa0f47c91cdd04085925da48548e3ebc29325cba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01e05c5dd41eab43c671dcc4afed3106f7459d1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_clvp.txt_chunk_4.txt
@@ -0,0 +1,32 @@
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5aad6e2bc6cde646f69f326ec9db47598eded19
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a00d1e0c0db69118ba7066d409ce032f13f85f21
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
@@ -0,0 +1 @@
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee1daff310f9522b7eb2f916aa4d5286a1e8b16a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7021a4b36386c88408911eaf78225f2fc67be9fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc52315d303991c26b5293e768e80c8d4e1d9209
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfae29040b8e56eec54b6d46eecf1b00de476593
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c36d98070c1438304c7e7e81cd3f634ef9199356
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b706693ff60fa5850615aefd0c082cb5ca562e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9af5c8bb0ceac40fe88fb366238503b7083b6905
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c641f7df0fa902105c15987251badca7e9aaa7d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad8b698892c9ec2ace1588ca16fa4973f1bb7fb0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
+
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0913bcde892ed96f07c696a71f92b91e4a123188
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_codegen.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16ccf839c7d1d4942a777c36c6c3bbb84de1c2e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..571b070fd6461868f080d1bdb63213c0088fb822
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f41f6b785008cc4ed6da71a48f0feddf5b0901eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01eaffb4eba535267a30774f92f9e353924e3456
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e48d44fb21a4afd5c0cb44b123f4b464a182f41
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd7fe0e2e5c6753756ea2d6a88f2fa7e038bfdec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cohere.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81fe50dd51833839df905c92f981f5ae59cd91b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7eb454cf4e3d3d6efdb203d8c506ff2109433a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6019300e874f9263ec7440a862ce4e42a944c993
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9497c38bd2c74b1dc53c1bc255cda3ccaddd458f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1 @@
+ConvBERT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..401eb2549e1374ce6bf2c3c9c76a3106fea8b1ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b78118c14a7452c34e0b1015e55d85c2d2572e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
+
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98b3eaaff3fd543ef8109276a19037f5d1966624
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convbert.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38e61bfe2e1155423a8fb22dd9ababac5f44bde9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..367d6bc94821b5ad88fb495b81f5fd4ecf58e208
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fbad59f1b2335a93796055ffdcef8757a258da7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnext.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f63481d69157acb38f4e56952ab8f568e38c4b1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e2735c45e47562b740d192500187ca90eef76e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6eebc0344a2944b7df01ab20ec75eca112e94a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5070532f248f6d37b4972556f43b28edb6a89209
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpm.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd37eb7179fabc3861c0ab3377f70088d55cf218
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fa2e1511244ae58a58a20a5d778f76645ee8b2b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1 @@
+CTRL
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ace78df973c92f8eb082c3abac8bb4f364317fb7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffa98109fc5c324d5c6d1a7116d437942ae99aba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22a631162e2819aefe66e0fb7e8066db5e7e42fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27493f0be4ef49cb21a07b7effc7025461525f4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..357ef122ab225ac9b63a6a224900f63f59a71b7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13d040e5ed592ba7e28b0bd831f1d36fd325408d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_cvt.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3ab0f400dc242b5d0c8d84d3038609044f54196
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4878486b6a783aba5810caff9472f5260ca6d0c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbbbbe7704e7c88aa53c442eb427cf9cec66217e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d4d69ae20bcd8cb2e1515e40eb72a3463c322e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24e526ef2711abddcc193526abe072f7988b3ed0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..266a020272d0b4311bb9c764b653436c0f905c82
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cf5837aca52a52a725eae8cd246e2b6694a97b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b68f38cc9c2f9484a92fd6b6c4d69c9ab4378a8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..985381db82cacc422941d1d1e416bc7a28377e5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7de2e0af676c49d7527d2d407bdb0d5779c226f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c4226d3ae7b780fd12916374e8011fdefa98386
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b39467341f654ae229dd090cb77c637fbb5daf1e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12efef81df454f61e4c88bda9c294e673e11d06a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2a3de2a509816c540660d820957f5fc48093a2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38e486d3662addcba38faaf5eebfe1433d962d48
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..926e358e70ef43e58af84d12a1d947807371c441
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d42bfd468166a21fc2339562366d9759d676985d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34766e50d453b68eb5796d2122afd0b8f9408300
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deberta.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
+
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ac4537e7c7140c9a97f2fbd7f98ca414cdb9104
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12b8e396cb9211ece702e758b417b950d4049f50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7e618d79125a55825992fffc3d9d366fa6b9a13
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ec100e6a99ad8eb66c124c6234f4d0892fa859d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe0cfc8a7a66d417ba3e5c6b6978015ebec739b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c27736eae1690c17bd1084b703b3a2efcd46262b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..194dc109d82031407cbf17700d5f70f30337ef4d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd1812208be588b94a3a53de875ce31d7774510e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3796543cfb080b1d4cd189831840037bf2b9267b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deit.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cee00dfef17b28b9522e3829f7567c5c42d49b8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..747a0268572f6d0c8c86f42edb1778ee02495f50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83d2f4b4c2815577b242149f1faf95a433c80d34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deplot.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a269543e4bcde0504aed63cf1b63eb13558e91fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7364de71c64087f023ebec51f3f211b6f105ce29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59a013c5a5c5be5d98d9fbe248f13e0cfb228b50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a17ee648a22f544888ba0ee797fc0d53afebb68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..018a537588342034b3125a2af1f448ce2cc180c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb3d3e3ac9eecb20af096f042c5845fa4cdd654c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09e5c5ff49da85aedf66d9bdfd1faa86f5cd7460
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_deta.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14731807b974ff1a00d517f13a2fa29acee7078c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7003eee4f353c8e73875738bc9115e6cad17cf81
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2657cc710260edc6ba793dcaa47d74777ddce399
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbc8208063fa3622ee97a58f29697dbd5f417d27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5990152122d72b5575852184c649e9cc94607b37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed5d721f25674c788bf25f2c662f1b4cbab0764d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_detr.txt_chunk_5.txt
@@ -0,0 +1,30 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5b5ca53fcdafa98e42c3acc99bbf537da385594
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51529dd12297a2378e6a8d201636228fb61f7392
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b820f7fa3a76d601640311ec3c9f72a9d46bb797
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt
@@ -0,0 +1 @@
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75013820afb1f37b1c2a61c06303df64f94e7cc2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6abc370fb860280766929f6226a53e9c2fbf1f77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0da79f828ef992fd451d311d2133c3640225c31b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinat.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27be2111a3c5ed14b3e569b7c3e1d5056362f041
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..823519b352bbd3a0cbdb2b431cf6f68cea934ce4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32657de9731e836926a44b9886b3586cd917451a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1 @@
+DistilBERT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..486af09b4ee18de3b6be5a77d04fa40d9a48798d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214caaab48358e8c3453346f58f7442cea5001e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt
@@ -0,0 +1,18 @@
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..128b53435369c3cd6b8bfd21b154fdd2343865ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
+
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb481182fdf935fef3e4a74c6804f38883084f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06f4658bb48b604b705d493a12f6d1c198ea3e18
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8d8c9f5e7d0d7b8bd9bcfa996ece1249dfc05cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15ad51a9d026c5c55b9b4eeb66ceb0aa1ba9ddf0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5b69dbaece28d3925870ce548b6c24f23c7bf1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..601da2af8e017329bd2c13a412f5757db8409d59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b991e7efa467f25de6b389d0dcbb91c7588383c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt
@@ -0,0 +1,37 @@
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22bede09bca2466bccd442f9335ea9feaffcb0fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3be69d034a0a1c7b4624767240481d9689293422
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..319234d34ba23460027313080fa2aefa9c794dfe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5296d9133a60cff7335c69c1fa73e6fcce2febf4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dit.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ca177559dbdbdb34775724eb63ab5c9efa1131
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ce2b9edaeeb55ff73c4d2ee1dc484d0c3590f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
+
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bb9c9dcde291513881a9ec11271a294747aab0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_2.txt
@@ -0,0 +1 @@
+Step-by-step Document Image Classification
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6d6c0eb7a7df5adcc9d2659403aeeeed6ed791c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a42d36b8c294d9380dff0075300e485908968c7a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_4.txt
@@ -0,0 +1 @@
+Step-by-step Document Parsing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5893ce837d930b72b3bedf6fa4d0d35b55a64ad2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_5.txt
@@ -0,0 +1,30 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10231fab1f9d0f2c8ae56ab1c56de985f2b78248
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_6.txt
@@ -0,0 +1 @@
+Step-by-step Document Visual Question Answering (DocVQA)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d70870ce11e107bef76bf84fd2a6dadc0e27744d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_7.txt
@@ -0,0 +1,32 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d41add334c858b697aa5a2914ceae5faedafcd21
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_donut.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66d1c8221b415f01b36f913f099beb4c4e7e970a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1 @@
+DPR
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dcab1437f02ad4e1e96248a068a6c7da951e242
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d44fc3f76884422f1d3cdb1f8e125cf88fb5d8df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fa76049bd0e92b38e1ab7c472c1992ed7e94ec9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpr.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc4b388f650164707ec5844efa5760a13f27748d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d78d59cd8f757f4d79f8b04d2f8234e91ccee958
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a945c470c3a4ee677103f46c289e507a66b41866
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_dpt.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..829276ac090f3d1300635ba8126a3b84e0eb1c23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86cb7f073941cb74844301e5a10b9120e5aff1c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e627ac4b84a7ae9fa86799a5354eb045a29ff71
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e71a6bc5e85ebfea35c64073fb338bd956ad1d3e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1 @@
+ELECTRA
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aad3b65e45ef5395482606047031ec0d7e6ea47a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ba46ede36234974824fb8c239e1af429cd3456
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4425eceb06ef881062d466aa722b8788902459e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f427f47b8be136b51693f359e651674800495a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8583168511265ff52bc11782fe2e3f5d40422b6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23fff0552467c83618ce5c34b217552b9de92f6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_electra.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82eb97db681aef3986b77deb896ed36dbf427602
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dabd7b45f73908529f79f7d98ab5183d57f71e7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb15025e93bd03519421d2983474c29ee60fdd99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encodec.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7d29451a768277d45da5e9394c17bbf0d9315e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6146586c959e7fe31e12004cf3150ccd3ca28f10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dde8ab885540f5226bc3fb7ba0035fdced824318
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b290db6bc37b0babb508452513a933f34084362d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e839e2cc20f0175118ae452b39ceb4535055e856
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcf8d20798f26dbe1373a1fa9301c7de20645ff8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
+
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c8250ba29b326d3b364ae525a58dd7544943853
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbe14060502b8121a884c890a533bb4c4148c1a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13112a1e241c4f1c4fc4b998e1f3afe97b91fdb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5664aacb4524d31bfbd959db145a16e132418c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb2f48a0b372be617902084576d361fdc221ccde
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c19d2807380b1cc7c2eff4d1a4c748eb0ca52f6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67024636f41fb0266124aa5084f3a83902742caf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f93cf2bf21b4511245f7c80db7d2b5a4bf76765e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05cd63b38406ae17e06cabffa3e53ca3d2560b3a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12e47744885afc032ddd1b68fa8d980d56759fa0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fec6971c9db1b608731490e0303c4efef8ba1495
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..267f23b64bdbe69e1d773bc1f0a65c2870cf4370
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_esm.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44beb483752e8ef9ab0ec861c2e0c282313fa9b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b452024bd3dc5178750cfda7274d0ebb2dd7c47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e3c5d1c800dd5a01dbb184751133bc8a7c874a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_falcon.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46c39d57b52b077414586191ccbe34ce610a2c06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7368d644262df1977928ee0c3d126eaa69ef7005
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d881b2096632af347c0b1ed7d28089c581af43b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cff75c17d6e2d446cd3eb4f0dd77157ee3357a1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..778d0352961ec6070c872b86efb3d6134b7c5254
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt
@@ -0,0 +1,26 @@
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23183914148d13533dfe729cd928f5b0d3bd1c63
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1f75a58b0472ab476d623246f2bda35e509f02d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2fff0a479fe3b65044fe7035ae694b851647170
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8071806f933f516b6e9a9d8c0e1a8f9fc78a9a24
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..580be3bce6637eb6ba3b0ab958a16d9f83d95df1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..287b21dca06701abea0c9fb439a6549803844e7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1 @@
+FlauBERT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cf8d75adb60f7d7da6ffc50ce8da3fcb23143ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af0d5e0c0a078e22e39e4bc055aca69277a51f3b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ce8720a8ab4b92bfecd74b0ba29d6c9c428daa1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c7e698f36a60b53ccaa4473e349b22fc528b96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7afdba15908f5f96c7deed384b20933925691015
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40539ec438f66124e7a0586ff589e410a8132b7a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fnet.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8e451c3fa90eca12aeb8ca9a43535908d609e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0daf3b011f6e589713a1ee5eb1176f62af128a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c97a71ccd7a5885c827cdcc108f43304f054c381
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2ba0217a9c3349dc12e637841ece2d52bdc27be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1 @@
+Funnel Transformer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28be235e109d05c595b80ee3f92c52f5ca8ddfdf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e50f5e5b5e02eaf727e4cde9ca15b9c1b6d7c61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b1fc267854c37f912f768e13cc9a5ab2a7c7e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8fff712fb0c630473e1881aa46ad91b6e457a37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d534dccdc612a8e2540b5a8290b3b7b1c72a4af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_funnel.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c9d239a5361bdc18fff9160e227e33c5f92e0bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f553b24b4525dca8ab82d31e62b915d6af27b36
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2893915432d4840c708fa8459b533024658a0d83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..413e2e3c6def9ee57a145fb6137e2ca477791776
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..461c0d9688ccf267bf4f33047607f76c8d4cdfe4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c130247a929e00377a9d1d650611bcc5bc8a0e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc4296c700ef8bf8ef82315344898dfc6ec0d9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccfdd997442952736a5a2637a7555ded86d0a755
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gemma.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1152cad81414be1a727b73d3fb5c7076de66889d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..267ee7d484e51a593d168655565e50bf4b7195cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9f369d382bb38b425b52fc9bc5e021181b0f3cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_git.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3acc7eeb5ff52da07a5865b33b0d703c1f5a7a54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81a25106b9179ed73b2af081d70d5520f92435ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa412de6121e63377ec79f79b61a7df91041ca99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_glpn.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6951f064617ff08cfd47af47cf5f4811b444f906
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30faa991addd4cbf574800fe48a45d3b0e02abc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e39fd2a6464e85d314503ae965def2562196ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1 @@
+OpenAI GPT2
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b869f26b7e956f535286cc09195b13a243a8d5a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad379887e4377909e364dd8994879bbbf7ba2ca1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt
@@ -0,0 +1,23 @@
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e90de1aba37283ff654850b04caf449e7c962f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2275b94e155e7d5e6c830f925ca125ff70be7914
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a72a7cb79202a036a447e2fc82d0d5b846ce955
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4891e93f4edddd03d382848ff120cd0202de224
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b842e2562c3507ad0b32c9a75873657adf5b22c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cbe48378bffeb2e6e0359b2925c4116aa0a21cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt
@@ -0,0 +1,36 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e37d810f6553e741f1b3f514d00e022689fad202
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb3550d053568562f4a436609c8cc86c16fdd4a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt
@@ -0,0 +1,29 @@
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eadd04db2c31d3966cdca2cd6666671df3280fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f02f883bb420d12247736e5de96871d9df18988
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9284d05e0adf21d30b1d1c2cfce7f943665da0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b084465aa5c342d099ad6d4602e6ed8f443607c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45a4392688b5ae436a164bd7ecfbe402c5b48ac2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1bd38440d1e25755a726eadfee272a03c99e6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91c44ccd84a867a9cb3dbc7c4844ef61837bf7fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96a8ba3cc95d1412f9ebb758fb8edf4a6eb9e962
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e5d754d6bb795af1cfe33a83d512fd228f907f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd0bd5ede1f025ca9c39ad864da5f5a75a18b42e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9757874a038b418e9666730405e4566b75b1d0cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0c10dbf214115dc0db3a294204ae426bc599422
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b21662ddfc8d28e6f9f192ca5265c5a0d8d5b81b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85d690024f7788a20cb8663862056c1a5f2ab9b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47c0324f1919384988a648f27ad7ee21ae5fd21b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b05dd387a801ccd45c7c520b6aa88c1d57d31785
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f995e00e64c615ee2a1beeab887d60c531965f6a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f974d7d132a86597e14f01a551a160f90acf038
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aea376bf10325da0024c01d7667955bad0966cbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..383fea78a9d362d7d505adf04d0cf2257df185e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59a199cc66c051f238e4a64d70f511e039791199
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b416da27fbc4d7aca2fee389eaf54a6c29d5b85c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38d90dff7b4914960bc90d5b3246157c6b9b0d0b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb11550ed4ad3150eec150ce3707b08a6913630
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptj.txt_chunk_7.txt
@@ -0,0 +1,32 @@
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c82858be27bbd064cb488a09691f2a3839662c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d09fd4eea5788235cb37ad35b9b1a80a31aa0b06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20ce971dd22c01abece6f534053e75a92618ea2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5efc62a7095dacc8d125a3efe7bdca4fd84bef1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7711106686d5f0f7655e05dd094f04a385c61de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c4a95bca13a806274601d7dacf9b601e8bb7c34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d176cf06edcd98f60070e34b78a388b96289ac9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3620be3f60b6f6c9d1ee01a5dacdcf10c6823f77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c2ea1d1dc91ac7e284f35ee1016166d7c702bd5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f4f278eba86d7df329b5c557d5d2ea47c4cedf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cca8561b365a1193b3833ce968bc3ee5baaf5f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d47f812d70d661333675beb5749831f52998202
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69a868495adeaa5407be1d4131c402db1680ed37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39523bbfc0d3d9283f08c38732905dbce44ee128
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f40794ce5f65bc9b40e11c49b71a2f121a02ac9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..224e3f96213609b489aa65ecc0306586396b7a0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398f90996eb2cfe986cf0fadc5ef8853f5f027b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_herbert.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab7b68ffe5a78e3bd1587219c8ff19ef04b824ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dba4954344199d0d279f60ecb713fb100e145de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27e75f234f9a5341345e1017a3a5888c2c1962b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b81d5a2b7384b4851a7792b97229208d0a8491ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_hubert.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6d0acb10a6e317a41d6b14cc8afc7ff35de40e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c103b36d1f3a7876e0e01af2f9cab66977b6d662
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ibert.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63af8156d3da9025093084f3e3cbc2b533fc3764
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44f2d4f095c588e2bbb2a3b4e6ed0f32aaff4bd4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
+
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55a5c86f9ceef053d151e6607f0dabaf842361
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a498697353ec56e171743b4c0d45ee7f7f19bd3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5782950ccdfdf594478ec490657c71edd123d432
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49b36bcc712713626f222f6d315eddc71c37fd41
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt
@@ -0,0 +1 @@
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07adce36c146b11d978c27d94035354c12471425
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt
@@ -0,0 +1,37 @@
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e89d5a554f43fee0b714829fad29af45b0fa7ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f00aba6eaa008d2c726695789d48b57f4583d80
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61301809496abbd9d915fca8b72988daa4ce1e93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt
@@ -0,0 +1,16 @@
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4e7ccbf830e70ee0f21ccb5f7ee36fd6be8437b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..056010985ad8e07c2d209d7fdbb7cca326a56ccf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9cea44d478dcc579e63ddbbbff7a7e19e4cc358
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64cc96d5e341ea99488ef210421c40a1d8273fb9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2359a1e367ffad5ed42db72f402d58c1f33c5cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1813fc25671dfb993665a06baa87e161f8c8744
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..112703183b62b7dd5861aeef1322fa302b0a999a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78a80e97e597487584543c46d04ea9bc1dd6c3cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_informer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bee1b6ddd32c7ed7ada66e79413d08f7d0d5d00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa9626867e787836a88cbfd573a50c0310c0587b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..825c7b261f69223c6c9d36ab032a9eb47c449560
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9e82b4bd2a38926ba2c38af0947f6cd4ab884be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..290ef17fb15ec1503e101a55d430d664d655f5fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0da78de76f4f408626eaa4fe0e3889f94743ee5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d41ae4cfa8e7f568cf646b1e021dfa2e9eb498fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f4272318e7030e4d802960ecf6dacbfa23c34ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jamba.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
+
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c6bfe8185cacacf6359b63ca17dc4007858c60
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0948abc2a6ea4ac2ba9c6165d616ce6f3efe65a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..478223f32f50726f8a73d24eb17b720109000377
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdfa1e60b629fafbc2d828749e4d53c2f2336948
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3276863a24079f8cae86d307215f35fdcd7d4b25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61e928af5f48b688e4f1646ef9210b5b279017da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edcde9b6551bc1d562323c3cf6b422282ff928bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f61b7745bfb8e86d9f47e2a886d3d4d6efbef06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..435abca0a7aa3073b89f0a1f422e4b6b0d3830aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e2f6a52b5cad651580c79207f1b05affa22f6f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831327aebb6928f87131fc054e9d34e14b2e294c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d6fa1a95da445bea73b6aa668a36449ad03d528
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9005516899d860a81a3f655ada698393bb6c4d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13114e102e54c2caea12eec351872e8455c8f640
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a9f9a8abe1479a7fb34e625cc7d36807460c304
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f807a9cea01c446250eface47c8f7987fb637c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..129013ab2afb9a879c0b351f9b3088c01f8696d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
@@ -0,0 +1,30 @@
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d3104a653f678f83557f54ec2c6d835b8b2a79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f0bf77a9553165d9f3d89f166fa10c9ad2fe01a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt
@@ -0,0 +1,19 @@
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34edbe923dced4f11abfe3692f7765dcd54f0351
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8751dc54a0927cf404cd5bfbceaed542d4492cac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..630cc4b6ca09ab303c06791277476daf68c1e44f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt
@@ -0,0 +1,27 @@
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b70cf30d4d4849e0beded8742a74f194d2a3ab14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc075e8cf5d897ae5f513586e14f3b2206eb1e2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4217494b535f75b721938662c27637533157fe8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe972e96f9fda091ac5c22c727f4f9e40f04924e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc04f46bbbddbdb1ebfa98f323d71194422e2138
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4969284cacc82e1d76805fd4d4959422dec6b27b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b9d71928f8e9f7e7f82d27925c16060ef501d89
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529e1588e12c8e292ad00e63e9f38b67d139e5ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63a59a98c6e4e1018f2c0acff233ee39e03b1200
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e47bf24bab52a40e1fe0437d7017490ce14911e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
+
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5571a7f832c13a820e69923ac5be06f9be0be234
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8264c40a00e5e1d770def7da6dac28ca7de5c7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b9d1101c0c6d377c45cc0226588deba6234897c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt
@@ -0,0 +1,36 @@
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
+
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..932efb8e6045e7b73db64963cff1fd067ba5ded3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00f7a949997756367dba62b77bf72e0045ddebc9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d784f266f5244ad324589aa1cdc74153599a652
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12fb4c0fa9c2cc2ec8675b6e635d5822a8f922d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39e5f3a563330d3e1699fd57bcf1d46d039e2e55
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43dabdd1c3ea88215f30995d283990d19eb81cad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76bcb72348d85fdf4dc86935aae476e3f651c079
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aea7b14da8d227e56a61c9b52d80e380c752c983
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_led.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db3711084d2fef7479a4a6ced882e5516ee70a99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..370f253b4a3e0f94e9d7c57405573683e239c714
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71c033aaefb5a41cda99fdb73e65767873b582a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ea407dbf09de421f02d6f2e4886de94f6ef7b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_levit.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e844d35eeb5209f0d2affbc5d09a7de29593ac93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90996e6b3483accc05a863f2fcdb9e3ffa92f0aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7eaf9bb55f4c5f3e6d485bfa6cbca73b24e3027
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lilt.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50187c7db413a9c56e9de6872465b867faef6b76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0d0cddf450324236a7388a983e11f53ae5f1e4f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82058fa7d2e65f6c17fe9b86bc5793646817a1a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd54faf2c0dcd86ef31ca189a6f9cf378790bfe7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c477c78995c45d1b25b9ca324310b3ba5cb0541
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama.txt_chunk_4.txt
@@ -0,0 +1,44 @@
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a074def1a780a8ecbfb219b3ff80cb8b8ab9709
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67b00377e094749cf64506a48d2a0ae82fe3eb3a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e3bdc73d05722f19d602eb48650d8347f8ee299
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..971b1348720e008d84b6790157dee7ca64e9275f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f5df0ff7379cd25a265b44405565c9e27c8d6da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f3b703d7b4cd07daced90a5f433b074b235d9c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
+
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2525cdf6b62b84778d636f283494f9fb8c05c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama2.txt_chunk_6.txt
@@ -0,0 +1,34 @@
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72c1e0867106dd6493490190531885eeca95830b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8779ebe7d3e611348a34a25b2b3f599d094aa624
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71e3da0c1940cbca49aaec40040b2d12958ed6a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..962cf49d2230acba85924e1b867d859376b60157
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3fb58d9e2562bde7378031fd103c37bbce9356
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llama3.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6122d9870c0f9dbb9f009b2138aeded50aeea389
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85fe4f9e92b1112c34f547bc9f294fcdc716e82d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb4d8e1dd284705c39ba20432c5eed40f81d690
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a8fae85642d430eaa5d51056a2fef55c5b1869
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e2e8054862ad74da78f88734b9c985e9bad51c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61e3c4bbc7f29784c840e48289fdb9a4275e8afe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..334518e7e6d45b3e7ffc12769f4b80544f9d04fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa3749db6f6a4513c90da6a1082bef81dd4acca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a19d6d0c2c5217193a29b5c294aa8208f3d0e46d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e51ac2139edb06cb315b809125227213d24335e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d5b56956648e7321ae095783724b1ec3dbe9c22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1 @@
+Longformer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a963ce0e9f902b104454448b47868b14b3b3c04c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfd13ad7eebf12ba0a8192993307bbb310a5cffd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68e2519c0cfb2755d6b13adec9cc38bd131e881d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bce7ff95f808e9952ac66623e6e40d16523821c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+0: the token attends "locally",
+1: the token attends "globally".
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f18f15e0db0fb2b468f8d874e02ab7bb2d946f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527e062c55adb36614a5c87a38db8ae3c1935ab2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ad537467a364a2973282cec8a14aff66f6b6f78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..458695bf65c6ee1f15086956e9e4c55ec08f367d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longformer.txt_chunk_8.txt
@@ -0,0 +1,37 @@
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
+
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d08599a49316e3bb361bfaf6d5ec09eb8faa424e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6aeb6338a65b188584b64719caac08e7b85b79f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78d65f19e5300ced0504819228d23465980a45ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_2.txt
@@ -0,0 +1 @@
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a54e463e519226003418d0fd683c034093f7d12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bf0f4b10d6aa0ec62d582289e9cb49358a80da4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_longt5.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e789949e80c664f9b1a8451e1fa4741630ba644
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae814adaf12dcd7bcdd6191aa61f0fd66e9e52b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
+
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53131c78fc48db60bd7b89dea117b147ad04d075
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6570842f6ceaafd6ab275373b3ea5d1baae1c810
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
+
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d67c9b78804973733b333c65bcab17c76974467
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
+
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7093218a8c2ce4814b049453c2c9c1daf5c63b78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2861083fad06ae1c8b521f0fe9f3057262213e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc46983825fd5186c14676a7faabf23c8d1dbaa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_luke.txt_chunk_7.txt
@@ -0,0 +1,33 @@
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..452eb7a7d5328a621376a949b76b40524b312106
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e771fdae7096617a0528e6d9673bee30bcf4c17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4931bfba22ad807fb3f45432444e6271f1d20d9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc23bbbf356b12364523187d3966f286493a6f0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e2bd3cefa5db4f328bb4b634f40f40a0661c8a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbcc3c20c70e49693f833f9c0ff212251bf79152
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de3d80139c8bdd5c0f4142b94b8dfdcd3a010ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a41d0022f0d0b5ada26b6fde4b83ba2874cecdb9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e22fe8ac47c6e4b34e597ae95c7b0eb5f31050d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0e0aea4e20be6ce11b798eb3ed452ec206b281b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33de25f88310be13122dbb24f702d96980a40f98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c897cae3d4e27b6585cfa0fd899be2639924173
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20fbeab84e2f8db4cb7469effa88270daf08c8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1ecd96b64be3d224c7655f84a4037e84b7fb05e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc3bdcd4df3c276259d89e1b5092b3a5634cca8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mamba.txt_chunk_3.txt
@@ -0,0 +1,42 @@
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc60b925b7e9de6ff1784adc179c6636ad899c0d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07f3b3f5ca38986e746d448abfeb9e309bb52116
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d23423db8e3cf07a0ca5a99581518fc76b60088f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee6cfc5d854a732a8dfa3d57844ba98656353b15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5deaf5e1066a603d582697fd74145eb81e8e8f82
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2efdb57513a210dd52a04a049603cdce61d89c51
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..993df480c752c0c1a7e4641618a7dbcef3f125f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21c3546f0515b9bfde024199fb1224cd8789f522
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_marian.txt_chunk_7.txt
@@ -0,0 +1,23 @@
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..252d6681ac110aaa68b79dfa4e69ad4cf5ffe391
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b59e06991a0caee6b104e589ac0f8a05c114d7c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55cb02904aac6577397963cc878bc51cc1738579
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt
@@ -0,0 +1,30 @@
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2875b3908740e6fb3289651607fa1dd27626f6ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa017bba71b2cd44e89d81564847565359931f02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7bfa0ade8428d7f50f90b313995e82d44248bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc67c925de678f968ca99a68dd15d41e409b7266
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473794a619620b763b51f3640bc3581be8f7d462
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4147958e9a6b7f9cc6dfbb342fae64d1e2ec9e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c98ac73df16ed2a4f10402f440918b39b52e7ef9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt
@@ -0,0 +1,22 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae630e082a9627e7f6a9c0486489236524e463aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42c23e0eeb856d2f415c6c3506358d64d04584ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f20d31593f83ca1e1a5f729c459520d60b376cf9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6edef50a6674b7aa875a6a4af220d114253c69b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d420730688edd00cc7079ae91ee065f1416993e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2cec4e28ac50b4e992332feb09c7432954f7be1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c8a435a3b924be8f44e197ab7e33ad2ca037cec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c5265609e4d23d29760afc671ee7e3ba216002b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by francesco. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8bbe4ba5570de5aa629de5065aac07b83bd0d9a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ea2cd9aec3dd5ebd2464e13af361e8915febaca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656da52e7f10ed2f6703e7bc31795ee344b24ba8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29aca645518567dd8a9adb693bc2cdfb91c10853
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6110376c706af11a222a20da3ccc62ea25ccf48
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a797e1cb6b08511a1bf7206d948b652875335e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a1b288d9bb458fe7b8484837ad482ef381902b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_matcha.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0ce09fd31515abaf8016d5521c8aa34199df63
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1 @@
+MBart and MBart-50
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..508d2f7fc3024a145c7f8524e2700c3c88569260
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dc04ecc43550c76ffebc64a14b04d70700fdb0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c59e97de24c361b1d654ff7b7ecbdad312a796
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5e76ab9aa62990ad9a917b5578c21a330fde644
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7628ffadf554e07d9b70e2e3f12a58b72284017
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d38740b2b245317e9c40c69ed97c0390c6256b00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d0b4472d4b7a5ae1b8adc9548155c017b815002
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_7.txt
@@ -0,0 +1,39 @@
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
+
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..703e332f52ba0e18ea30ccf30313e02b4a84c890
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mbart.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ad520739bf2629303681dcc22a298b763f5bd1a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf4f8ab969c7cdd17c845d7f035dd422be4e2933
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69316c603fb2d81984de6283fc7a4275914e0b82
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mctct.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a13377f23901f73626c8a2b38aef3677fa60049d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6e324c28711ee980eeed9bdd57e737bf17328bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e10ccee2fc1efeedc42c1b016f213765ffa45253
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf7f51f947ae5356d59833507421119dc8b6d521
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mega.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c96629d9eb021d4d9f5c65a839cf34ba5ece5ecf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dcde018d71a57d383ded850cbf43e5b775f54cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b3d2bc87ad8cb72dd90ec76683be99dfdabb6ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66e0dbc8e397a078d31abc0a43863e9ed1d39377
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56eac0c24c2af6682d5b9329cf9b7261e6cdc536
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a843456bd59923bea06c80d0342be035fc46f1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c8558fd8f613317fa73b0b7ac984e6dd9c915e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19cc5314d6e5b8c9ab602cf3eacfe7e5ae0622e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..466f8f303c01e190bf1457b0eb66b7ea38184a82
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b615355c2be5201ffc2737500a4611d9df1d8aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5749f5bed84a29bb7066ee61860b51315631baeb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d91397793f109318c1fd7edf4af8b8b57532770
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1aaac6d4e528721541e99300be438198f5063b55
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96d239ce7bbca25983f85dd7e5595b9e299a4376
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da8028f7ed3a51d082b9d4bc677392f8044109d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..866b09802fae7efce91ea911c67ddbae24f71d29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_7.txt
@@ -0,0 +1,20 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7caf2f4dd68de5d0a1ea38d2b182c33a2da5038
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc6d9f40a837fac5637cb7a8e22d9d11e8652a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mistral.txt_chunk_9.txt
@@ -0,0 +1,29 @@
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a686b5e2d008b866fd95d232794520cd20a04e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac192d1896f2f0f5ef058d1f9e6510e572d08ebb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..281f12ec3d819afc01f4a863b451b3b6aa5d9c86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2efd0359566a5b5b676ea09ea4806fbf4c08d345
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..265fd5d7b610057f4721241dfe7b10081598a11d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4a49f03055bada2b4fc61f29a43eae46824ce4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..debfc74de84fa3fe36575e30a0d59f5eb5ab0ab9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebed1eabdc51dc1c30c57896f8a495008d3acc11
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3192ee3593448fee15f70b00bd1a379f8454bff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3645d84116a674f9bf2a235417aa5dc1286bb14a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9875650a920f6efcb170ce866c504869ce929bae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..facbd2cce3b146665cbae5bc5ef58e80f5eb7ade
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd066dbe0a001d07d446a195e658fec0848e8c89
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mluke.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70bff8475baade17c6f762d2bbc677aa9ff718ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15139ba3436c0cf8a35269cbce1eaf67c287bbc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0030e1cc33aef535aaf6c7d1aa535e8417509442
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec54b62e065ffda1aacbbe41493d8b07b1217f3f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_11.txt
@@ -0,0 +1,15 @@
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bf1be45f1bc95acd77d43e39792e80ae369b421
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..321d0f8c12d02560dd673105a2c5f64d0f92b7b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_13.txt
@@ -0,0 +1,23 @@
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6432ab761e09474fef65c4dde53aa2deaaf5de34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_14.txt
@@ -0,0 +1,17 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53591ea35ff193c7dd3687c95bdeee23229704a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_15.txt
@@ -0,0 +1,23 @@
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ac29105e8ab2459f8bd95c7ede821b972fc30cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bafb6307548b06330993aeb46a675158115d6726
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e463d444f0f5ee3fca86f3f9e79c518ab3c9b52b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebd53ddf3c710a6532450b29e2c97307ba908bf2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f16e39fb7b48530b97110732d126bf6b15b02c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69403c438b3f1279aad0b964e39c7b8c26b484ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f06f48af2ce8759e2ed205fe378aa976abbd56e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92243059de9ffbd80c5f67eb9bf223d06af8f028
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_8.txt
@@ -0,0 +1,23 @@
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59247c30b7accc660c33901724af47d67f1a545e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mms.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..412f62ce1278338f281050bb83ef51c69e4b329d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5c01a2391e411f013594d8def6f6559fb70800a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..542f16d03583249affdc1b5e0e492bd127a0a2aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd08de49d026ca3e694b293b756c61d8e7fd13a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f3174f415f4e4465172b6a39e7ebc4dd3d9e0c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09e8743cf1a4f6f95e3fa4e2b9e0dd41051bf7e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3116322f7d2d9aee8291089f119e3c4c31831fc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43f80c15b29c2c106b91f95126ee96e43416544d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554f5c0fd48e39f4bae5e993c4bef30639be118d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a1a2582d686d868cc427a8d19327f7e9a43ebd5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cca8a7ee0749f2810aeaa156f9b9a42aa51020c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4848692387dad95cce9ddbe19b3b5ce6ada7263a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13712f834cf185ac7ccf9645c8a7a430d4244b38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72b27dc9ed80daab157df03c1a30e900680d3df4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..327674b9808ab26c700a470a4fb0e5649ba989d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b7768bfafcaf0569aa65c153f83f26ac9fb2a11
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec751c80080e4c8bea76318498344b274176574a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f42df4b0469a67bbfcaa2654c16c9979697ecf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f0f9798fbc019db44c329bf64187c43c3ec36f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a60f47c7e4d99a010238cd2c3631d4b98762b7f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3167700c4c215eaa0a5f01ccd2d4980a1030013
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b60a1297e447d012d49cccbfb076c0f1129e04ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1af4911ffd897290caac61ad601c212c7c83c1cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf144ae55feec2685963cb4ec24832f5ace799a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e3dba5e730da239022246c12ab86b542ca7945
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1c788565be57d572f0533177cffacc8cfc8f2f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mpt.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mra.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f5ac5aa9d405c45bf40af1e8fffc1ae0aa4a7ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1 @@
+mT5
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4752a6e364ef905e49fe4d3938444b68f564c1a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20032f30b3760e06dc4f181ce84f4a9a1982dbbb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
+
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62494e46a5eeb123a02dd79b4518d8bd379ea530
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mt5.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a2d47ea65e1d9e13757f6889cef1f66c7466a05
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e54375820878a2ebc291207fca47a4e527a4d19f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e1e675cb7a1a698641c54a6fb99e95228135683
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abc0d9f15df554381abe4472d924a6f4261ce70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt
@@ -0,0 +1,22 @@
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d69c24d0a5fcddf4716488173c6c108d41e05ab0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..032e400b25550fdf4f337d76d4fce9330da89c6a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f7546c1f39e807e38194acbd1acfe05b9fcaa54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b17f5d46a3bcd01b8e23009ec7a1a305a658900a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb83b2108cec01a1115f4bb7c90c70efc53c4590
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b5a413acc64b4612bd66b83d104c35b42a8712e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89f9de1199df130b66c06e935ae280210f6cc79f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..369c099aa9c1b9fc9a5d349e40926c8c77b9e759
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5a53d98b9d80f1d09bece2dc57b9abfcadc3714
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b290ab8fae13149c0359aa8cb66565469b98a763
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7cfb52de994bcf2f0863ab5a0637823332c6b0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
+
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89ef9a96748563fadd05cddeb87453d3b1cdb830
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt
@@ -0,0 +1 @@
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b3a9bb7e95373c614f0c10018732306450a9bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt
@@ -0,0 +1,24 @@
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a691f562feab04c36eaebd2174b1ec321e5bf47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bec1989a99433ac340f33e8c1d340c13dffdc2b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fef8e1ace4dc58cc16881978cc5d00ec800c9f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e855c0e15c215f91424ba174890cf7ecb53399bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc37123ced61923e7bf8fec1b4cb00aead88de50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2920b9371fffe8cecfb01f8f0962de7a1dac3e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff8f090adaf4edcbfa7e11fa649f1281196628e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af413c3d8c5bed70d92b7a9181fc5fe0daf6e13f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60cbf04dd315c74c60e199da3f46390e3a378c5f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4d12a523a1ab8dd2ae4c3f58d37cbf105dd57ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff7d88718823847f22a55aee473e5cbf91e5b9da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d7bebc01b1596dc4d30416e71423edc269e654e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99097206b3341dbedb138325800c16b599e0ec5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e055547da980a63a8f5a4b3f0adaa0ce3f756a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e30356c3b00ae342ff42606aa7eabddb148dad64
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_mvp.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a52d9e36f088c828f03ddbbe58017ab299b60571
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af80b84c5fbef5dac7ce52c6dbabe3fc4b1a597c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1695a31d2bc7a5c20bdb189f0a7a33feeec3d12c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abb7c3c28aee7ebc9663fd2811b0bcb40daa1b8c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nat.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8008fab83980493d160188aae669738ef311dfe7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59bde570e6717b9415fde910c5e874fce5a4118e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3169c9d4ba92cb120fa3ed58a4a3b97e1eb2fe14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nezha.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee3e194364fd5c2cc900d5ff4f70a6ef60af7219
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..833096a4d661a25b12ea2f905c217a24dc39fc28
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd1c02f79e83c283ac9aef4f568ca95f52eefd97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd77945b4028462cacd446e666befb95ad1f6dc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1132ac51e09c6233c896a5fd67323806694e5dbf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2db3505a4c33c5c8dfc98059d1d25480664217
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5bc6b1e4a056940ff3261d713a1dd1bf68209fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d0a5954bb86f1706007201e8f492ed06a87a772
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6364353430e6af315447ef8640579f9cee7763c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe52680a90b96e4b11a0d376639a6c737ec0f8b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8672c1af9cbc322433769324a42e898cd7ebd8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..722077286f2e095f7d89d37a21004be641586ec8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36a9057ec13ca0147df5cb7fdc4398dbf2169837
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nllb.txt_chunk_6.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcaf935a0da55a7ab5020916ecc6b06db305bc8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce0bee47933786765989c188ba2b7f7b28418937
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b33d99366ad0b16a3784c1b50601ec329945468
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_2.txt
@@ -0,0 +1 @@
+Step-by-step PDF transcription
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34d607118122b07bafcfa5cc769a6dbb7a41873a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44210da85cf34a25d9116a52e208dbd3a09ffca6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nougat.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a430907d20449f4add3021f855aed26aa0793ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a78afb562a4e03743c21165bbf465e9425b9e6dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_olmo.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076e4fe9be4478ff5faacb07d3c94783c79ba1f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dfa8f51de68e0c6ea268340774abc1fdd581086
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f85b5f6d3c11e1472bd1e96dc2c3be8d2aba4709
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecd75436fabe6cd2b6a38acb4f02bc868cdc818a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffa0100f101e819392c9e88588750bccba00108
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cae1738482f0f70b9f5c5c86c39d51fe0144208d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..983ada92addf8b33ba28ab96ff7a8f3b75bfb0d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4095b3b3ddf32f0a48540682372059ab8dd558cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e0e89b4df2bd385515dbe8bdef0e223c1ec1a7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34597849dbabdf96cc5116657e0105f9b4b9fdba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1 @@
+OpenAI GPT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d09d91e9d1d785b42d84e5606555a5a87a63f0c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8564c4e14251436377e1a32035b74475575666e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..894630b791c84b72afd53d52e14f5d3abc5db475
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2986c28809fd36ca3e9b40019511d4bf1b2445f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f7bfafa256a1bbca3d31dc1e807cc1982ff5174
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
+
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4186d9e4b1cb60792143afbd946ccef0199b40b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec5a90760dd1c335221ecf3c2005e663335277fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9eb05d80a0d94972c13ec3aa7165fb683634b5e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c54e1daf8f0fe319d0e8a861eb0b50903ef4e7fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b508a6dc378c19b3d518241cc70797db2995611
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e917157b9554f3404574e754e9333b68698aa18
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_opt.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3308c27c3f24f18bd93bc8735e70fdfc27ae79fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473faac42248edbc2bc0a9ba200cfc064e31da17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..336540e51ef1243a25252844aa98b6eff8f3f356
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b4cb617eb91a5d9164759249b473004fc145b1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cb1e0293cf7690b2dddc1de45611b3ca27b4398
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a35b2e6f99e50b414148226d10b91743d761d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..570cabe570c75a5d852615227212ed3ac3108248
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4703b08bd756f1e918bef005865aae6f2adf864
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75d19fec2f8e6288abb6ad4ff2f6c0cbe1e4499
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6846ab3cf6f46370369fbf478ac6ce2d1636f5ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42920ef482d4e10ed82310f0cc5e69699f573072
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db4c02ca630e2d9a4abf9aec07f6d81fd1f11f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e4b6ad65f1ddce728522154d81f3b873437adc7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86d9e3ba5e8361d8b8622953045846ea092f110a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7a5d687ff96ebd995349282486c1ce9d757ce1f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29af45ac17c21430861f06180366ef7c7bad2185
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d17085cfa7d93c81a5fe7912b2eaf95109389b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed33079dc11d0a92ff7bc571ed854ef21cfb276c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1433e89bbbc5b03a180939f1ab942fcd771c2954
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41ce0913b505414d9ecab6d778e3a9c40ea60c47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e611d3bfacae272f17b1ea4d37db7cebd901d7c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..049c9287d41c5562aec6a52f441775de3a807ba9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9116500e5ac4aef2ab15efddfd9e73b8b243a326
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d835ef4545fcea63e9149514a50f45236f8dd54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt
@@ -0,0 +1,43 @@
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c6d9a2ddb98f35bf74d412a5f5755aa028ea69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aa733e522b0772f5490e9f0a02f7ae50d1bbca9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1538f830b25844080a803d1cf1433753ea975d1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d733a3c8520ffbaee4f58a3c71915bf5957ff30f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60b334704c19a7e03b0d672a2c9b2829b40fc31e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt
@@ -0,0 +1,70 @@
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428b6ade10a7c1f924f7c48b8134501e18c727f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36ce23cdbad999e4dbae94dc4030354f7aac38e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b9c7c3b36bfa5d6af072a488e0e41caa0efe7ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2350486d9856b677737b7ccb93be771d1abd184
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad52ca2531f68da99a764b0b11bc3983da3df24d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b530c821b6e4d7362ac590b77fc20e9ca4f85279
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b8b623abe8610dec7cb06fb0847d77cc6bb01c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d421b10503f02de79fe1735bffe3153e17d6c24e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7092953b15dd92d67f9b4a699f3ef789e067cbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de3e53e9bb5ff4c1f565ade7e49d64c81d19a27f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e86dfed159385f20b329bcf75bf3b0b713c25a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b711a1f401708dbb971e625556daedb2f113520
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2051402240ae0bf124fb8734987798141498047
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0be753d308d3b8b778d505b2d766e2827b2168
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phi3.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..185f3f5c0eff3eaebc69cf889dfdb6dbe8730daa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcfb7e1129bd0193846c8e7c0d7e881efcb8a827
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_phobert.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd33602378aedd40b3ff98aa4bc7a1ca9ae9beca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76c93b2d8ce60a6c9e7bfcc3119e0000b9502c63
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt
@@ -0,0 +1 @@
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38090bf17fdb99cf229c0aae89ef6f7fadba02ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..309da51e5099c7bedb4743caff3cad70a8da35ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b657322dd2fe319a9982e74a9c785495e5822167
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f7dd6e2ce83235a87ed26b950385f024725f2d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9eacc349de5e8b1629bc2d90c72459a3c4b737c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f324fc132a161e2f10354423c41f830acb80dbe4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_plbart.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d66b0836c02dad3799adbc226833afdebb2cf485
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a316b957610f39e2ad785b3a895a1d0701cbe459
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..571d090e12201535ca1eaffce9fa434f0cd9d980
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7c2585f62b831da3c522643695dd1148ddc239
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df8550723ccd5d0fa3afe421a4a0c12d1cec640e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1 @@
+Pop2Piano
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc87b2d1b10469d6e4dc8a3359d90238f19b9715
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c04e75c2096ee264fcb24df5154b5b1c262efb62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09fc3d1adda46fe84fc39bda8d08f0452b85a095
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b8e2099c70d19c0f6f022fb2741cb956816608d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d25f1f78283ee7e670e6b666df2fc646c293ed74
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b24d14c995b4fa502ca72ca6a8a9ae238ce1287
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce5245d43e3c0c5db0450f341864b4ef9e0a29bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt
@@ -0,0 +1,26 @@
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ce4a2c751ff946574ff3b90e0af525c3fe2fc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9ffda9c42487e069f3c72f58f4a71ebe75edcd4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1 @@
+ProphetNet
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c1deac11eb136193a9687bc007c90b9524ce233
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ae98ce5419426a90efcf124442f096a28359924
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..631cb1defefa87a134c771702b1aa99031a7a7ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..371b3db66bb8710d886671a25007c433860c1b47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2ffdc45978f311ae3172390658c46c706de9a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5952c7dcb8820c03024f10a3ead525053f4e394f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a293fae20849772a0bbdabf92b811f960344902b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..892d66c54e9437a981c299350f2dc1274cad0999
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..813fa3cbe8e9b5f24333d82b6f2ad50baa675f5a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..532f733a386fea2865b33b5b83070f13a60eedaf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2be5f06895569a53186f00fcfa1e2c2a0b14208e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3b74b4bc8734c822c847d40f575476f0d835c1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e6868ecdecd4534157f973f156f44b3f9466d5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..040aae12e8f1c68567d2d3aa1e7454726556bd44
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b40c5872ad167bb3e36667d53f57d7b9803d7f9e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ed5cf1be9c042882488b0d662f2fcc42767d928
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dc25afc8aea0a89458548040731711353334ed5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc6e6cdec7b7710893ee2cc9db09e697db4ea02e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cd37a48d305346fa4eab60282554713443520be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beb5d58d9520f6efdcaa6d42b7573e61e26e021
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9a150902c524190daaa591362b7c33a6dd86d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33c986b27fa8f2d4d9ad23f6a40e3ecaf62805ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..396142afd227ac2a85344bc531d63b1ab0287264
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0c952e45d652549944222c1cc4982d513bde6e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a42e4cfa4f3dcf1fe5eefe45c3791a9f20a348
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1 @@
+RAG
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..290036442a4fc0f2d805cb3fbdb87f5e66a347fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3883d9cf9613a89ef4b56ae6f24943a174d3c18
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rag.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c959884687df5ad6c2b19c3637d11c2deb91d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b281e1d4fb4a53192e7dc85dbf25950ab07b612
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_realm.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e05a95afed28e1debb7fa9214dcb2248570ee51
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a05b00c770d5ddeb2e153367cf08cf7b4f3b2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bece357c393c5d8811d4573974ddf0d40cae3e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1 @@
+Reformer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fb79a3fc06553e2fe621ca2cabfb78ce6ac1746
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d209ba1f6017835f126838fb900f867d030a048
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4aac9270f592ec42ae8a7b2a43b4a0ca52b4cb9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_3.txt
@@ -0,0 +1,72 @@
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b800c3693156700086fda9513ced77e561d7369
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_reformer.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7076883e18f27354a35bf2d354d8483d7ee93b6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da85fd9a9c3c0f30968d423e98d1b065add2cab9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_regnet.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
+
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5019a1c7f5316344adc405c49d44e7b9c65d67a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01d8145c04d79d064ba6b28fca2a42506d3a948c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b1ff89389bac2e25544547ae2c341ab03c0b77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e8e9f36bd2235949e89f61f87e21cf791f38f9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rembert.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cec57ae8f324d57731541afdb08300ec16147244
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34da1ca128041b3f1ba0ff4cf460226bd1085565
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e08bb8ed7e8a1ab78e3c803a9b31219d1c325dbb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_resnet.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_retribert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f51a9c3d6525e0bb395eb5e924ba7871aaaece
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4609aa8999f36b809c2afc5239c89f00ea496b47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44162e8e404ec99571a7f9bf2eb5f1c3ca60b032
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ce9d9dab3f6ccd79438db478e9abd18dbee949
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7361131161adcc6378324067ff61c7516986a08
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656d7e2f21b843a074537f24d0a6ce3b44a73647
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cf36494ccabd524579fe21c2cc2bc354e4d69e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1 @@
+RoBERTa
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c845cee217e9b5f7d9bae8814f35da6d24ed9401
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edef7919f79fa344545857d70a5b65f21483033a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4c9c28d086fd63df0e60e26c5db32abe9148450
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae769fd4e845f8ccbc911db4597aa8534510ff2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7083096c9bc0423275c62ec923878c582ec7104
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bbcc8870f4160da1e5cb8a3aa3e7db9f2449baf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f89bdc78b55a45f01212aa377b9e5500fa3ec455
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff415048fb6f464af764dd0631f1e1cbbb21d33b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roberta.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43240ad1bdda04da81aefa558e774741b2a27ff7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6270c07b526cbca4a0c9b01880e28bb022939d6a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfbf8f5ce77389ebb8ff9d5d78e20c4b2c3bee1c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41e0464c5e0265a4a8c736f792fd9d387c352720
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19d873a5bbb970365d5c2c4befca5607928d73cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..302dc90cf30da7a22accdfcf3f68472c16f365ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bb16c5589e3642b0d3e9ce1de18d9a4e2e94b83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_roformer.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a1366f13b038a4e5e165d83608b1c10abd116d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..377b31f2e5e11e487438576514ebc6c84c6eb567
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa7aa0ea4c87fcf07d8a59f597875f66878d8802
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2388eec5368699512379e14089cba65666577c23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bddff1b01895136b501388cf8429c9bd0d8faab7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8ac736621f08cda88e57a75421e559fed9d547e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb52dab41ad488b505a077ff0a4ade5230e043f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b25f18d741193c6e7e0f36f28dd19773351c56cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc126ad091de41d26577c0ff30a98248b074f869
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d226c87391cb833204b7feb4ffc2836009f0f448
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f75abb01bbaa09dadd67dd98c52ac86e8f424e96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b17e22c58d710f7187fcf32c0217d6fa4b3df81e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c542f24157aaf3d71c97386c240a19cb7858d1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd47b54ba56be7a49109407e5bf1a9efcbc0842b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sam.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c300658687aa288bce6745cf0a9fcd8e3badeda1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eff63c22d86e6fd1724ddff8f4ee3cba40f2db53
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61734c14cb29e222dd72c65889b3ba27805b4647
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff6feae58488130dfe07f1ad29a7c259033a9dc7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d141a4c165337f4f24fff0a46085564611087563
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bc9dbebc3c96bf78edd42209317cd6c2f153163
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ed6d056b27b7a4f62b4c9160a956c55429346e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccf6ba8864751bd806e125a66b2c392ce4509148
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c57f9e859e9abda037ca8d6296870efce9d210f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d03ecfad0189156b3f422ac39b6122fcbd22189d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19d24b30a1d366ceab94e2e3c53e36996ecf125a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d975ed8236a6fc6f89b9ca24499d4b908ce9aacc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e31363d8be2759afa1d6cee383627b3c5327192c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16531d37e5ed4ff8b2cf1442d9e009d0c6406da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..337eec2ea5884a2cc605aad178510cba4f1c2d30
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
@@ -0,0 +1,25 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..544472449e97254a370a954e43db2ba1e1a51e6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fc302dbaaf0accf269ba4c1e9d3641750487c91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fa7433fd99c6f9b811e869a49e0ee4c85d185b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..744ae3c3f058e581fc9ce0d5b6dc9582967c74d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f3431bd17d5a799e5c16ba15694012843456614
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49c2514ed1a153d8cb5fb261dac0b0e3d8054510
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f370be76d46ded7db74ff112c6c8e72352eb2fc9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_segformer.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8676e6d278f8489e8d6af62927fea31d0a30f27b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23aa1ef570059d9a1760fc78063fb8bc7a54018c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4012d0b4e0ca13f32a09a5192f24d081b135baae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f190dc3965afa6c43bb32d169810d5c7d92fe8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fda6d0683117ab2a4874547f781835c169f5dfd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c43957f6ee7fb0ddb3d5161d1c1d48118de0d75f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_sew.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7eb8fdf4a8510351264fa9c2f78d8d70d74f8c8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..281a146a607b71a9941c05715a1681ad34dd6158
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb40e61998a15f9608830e7c56dc682010ccb4a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad8b8c191d41e3c7d6f44f15ec8b87602eacc50f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b80101f3e166e517ea7cbd6e5f283f7fe94436
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf7007de9823252af376c88913af2dac702bd4e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56869e85f53e2768432707276a90af4b83fce2f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_siglip.txt_chunk_6.txt
@@ -0,0 +1,33 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92b8f89ee17676265a222de21635941d49291ca3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be7232e6dcf8bc41e7c0a28ceece830c97039a0b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71a5df5f334c5e283fe423aab73c807f77a5891e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94de799d4ed8410c1bcb75a39e149d777e415e0b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50db8c3401d21b4fd99c3216881310043055a0dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..022759c121250f6d6d0360b48ff2d71f2f3f15da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0b17f1b718da5ee84f29bdf851c55b026330b46
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fd37ef9c9cdd0fe5657b27bec4c038ca8cf16e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a50b76b29e9770ec8cc81a9da9a11e08ce86b722
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59bcd8285c1d7e106a01aa434d5bf08258e4a722
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0ec7973ccf6416fc6b376c497343319bd509998
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d693e961b329c26554f17edfe6eebde62a57b77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5accf8df59b31690e2f6c8a8f9635356019f56dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt
@@ -0,0 +1,33 @@
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44485e7d9c7995b3c5c9c7873d05c33e3616a194
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1de23e3ff66adeca23358833849736d775f70135
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..415f71ebf09e0d13db4f0c459b92a21baf9bc807
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5449cc31921e7fc57e31b1976ddffe61f34d3c40
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt
@@ -0,0 +1,32 @@
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c02f9cb543606e90341e20cde9094b277853a75
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70503f22c2bbc73eae37f28d6b82967faadfd8e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f502b0f42a269b08965ae7e9fe82d30e057498a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b97b009ae2a0b8f97bc807f04778a3ce7b374f22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_splinter.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9681ccddfc5848138ff8775de93731394147de0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38cec0e064d862aec11d69d092f13de532d51e62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf9002ef045afa81549de8e13400afa385af0c5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9653da86b7c0faad4e3f88e96ca31d10d48b9cca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb973c3dbfb59f1b3416c746aa67698dc39156fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e52aa2e0a14863aa2c89db3f0b59e89119ced70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02fb5db925b789a30bdf55430fe2fb7cd6784af3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4584415edb678c308225b9141874eda75d97864
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99f69381be06e7c467fcab44336f6842fc8a5aba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7611d78a3448caafaa8024406966fb5909219486
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
@@ -0,0 +1 @@
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09a7085d4593c8455f782c8c919935f150482e14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6910d5dbdf30f84f18105af4b24929b1675ee1c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c18ca464e1c6819f581f795bfca30818e121ab36
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..837318ee88c978d91fc35690c56aebc5a6f687e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..838494b3481b88b52b930f516d4ab87315e6dcbd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8557cd861a08d5c45442bc3ab070846d5ffd33bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1150e5587516f14466b84f072c0f2afca4181856
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8551fa61445b024883b2c35d365920a78ad7c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3ca7b0ebd43a4a93e3aed4f6084e0582c8a207c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1aeff806ec409fbe53faad8ac77485ee4dea1852
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62f06127e4956d2d20129d1d93609e12058b90b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..889a6e188eb4f8efb29ddc5f1a04446117146f27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..732f33168f8c90612e1fd67119c69cb7a768a8e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c6ad6e454a2e3bc5078a0b14c6195f65576900
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c563329be1682442e246cf3fbd40305aa279c7f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7661f5fc3e5d38e80f1283e45bf94d0c6d719e78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..530dcee619df2491b7206a330ad0fb18e3acdedd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80202e30b8f7823094b8ebbbd42110d8e2a4a67b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1 @@
+T5
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6043666d21321769c3bbe8553a30c938898a0e41
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836acbf0a847896c54c4edfe97775328d606679d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_10.txt
@@ -0,0 +1,19 @@
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b643fcb8e1b256c493eeaaf4b16cb27c7b36439
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_11.txt
@@ -0,0 +1,38 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e61bc33d2c18a31735048980802c7d3ebb40449
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58e2aeea9e829c3fd7d93a02ca9d51f6da81eeac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_13.txt
@@ -0,0 +1,16 @@
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f96990d67bf26775919cc9b29d646ad9f0b73f1c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_14.txt
@@ -0,0 +1,12 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c5b8f35d22ec05f932022133739d3d25d526ed9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_15.txt
@@ -0,0 +1,14 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..197f2235887e0a43ff795d96507d82191d651a2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_16.txt
@@ -0,0 +1,13 @@
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f453b3df3537fa68af87cdf64658d07bce2bd8ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..581addbb3b70dc54d95544664526d390c34f81bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_18.txt
@@ -0,0 +1,12 @@
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6fcb19045a8498abef2482f6d319071ed097f97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_19.txt
@@ -0,0 +1,19 @@
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e256e3fa0e4dadec5b8a967e7f853f4bf1311e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ed6c3c710cd4dcf118a625f7e528535fcbcb50f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_20.txt
@@ -0,0 +1,42 @@
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0b2770f05982b7aa67152581ebe4a8de6f41611
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4cde2d3d1bff4601d37a579b12b1ec557054a6d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48a8ac7e9f852a1ebd09825f41b4e9031d172105
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e12cd213f430132f77ce3ffb681d0b493b15c35a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7863562bb50fdf6a099141bcb094685031288ae1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48dc1459629fbc6c66dc711e645e3ce834ddea38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6be0c3c0533e1e65f3260378f244c352426a07
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f33498bbf7458c48ad63ef436040c923ad34465
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e5080cd9a1cb2c18d90b384fdece1e69f2aa591
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8015cf436c0f7d4363feeb214828d0c37afb8f00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06e0ea0014fc8f2815a039db69664804001f83a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7846661e59bab9d4d357e30d24f3b57e097b93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1d8c8ac11d3c357b634f5ca27d4e2674e52fc67
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d93b6f8b3dc3a06c15c67101a917207dd88412a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ddfca7f58ab6dd09aad6a4f50c7a4c6720d77f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e3114ef9afcdaa06e8abea35207906497eeb69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62fe260c8a7a8230b4ca1f19b232abb7ebb31e2b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_13.txt
@@ -0,0 +1,24 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d750cce8a8b532f2a5c0bfad0e3429d92ad5f7de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e21930e1be365d86ed756a851d784686fc9bbc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_15.txt
@@ -0,0 +1,21 @@
+def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0852c00ecf55af0490ba8770db0fe1d5fc7adf35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1713a7a25b3c1ba0b9aee396e0a79d4a0bdf0f54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_17.txt
@@ -0,0 +1,6 @@
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d18da96de89608d895c90e2c2176fd5febac384
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_18.txt
@@ -0,0 +1,24 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23a2f971c430830a6ad9d454503e5c194e2599d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67b2539c26ba93dbd9b5d355cdd1a55648dc70fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e068bdab9b629d85149d2900e7bb061dda4e0f77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_20.txt
@@ -0,0 +1,24 @@
+def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5499cc95df2affe77436c39323c8a72c2a6e748
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_21.txt
@@ -0,0 +1,12 @@
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dc6d4d1a2f370c0e90bc01ba1d54edf20efd360
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2bc12b912486c6941d8eebe2806a91c9e10669c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_23.txt
@@ -0,0 +1,26 @@
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b7ccafe9eb9096c60db41f20c5438b6eb51367c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_24.txt
@@ -0,0 +1,19 @@
+# zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e3cd255d58b7a95cfffeaa7c739d221917fe381
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_25.txt
@@ -0,0 +1,26 @@
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7af4058826099988fe4e89b1892e77f657c65768
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_26.txt
@@ -0,0 +1,15 @@
+# forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb9498249b9ba26ced64cf71e76a92b2966cd28
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_27.txt
@@ -0,0 +1,4 @@
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7298c17456b4bb0efb5a3eca99e60d2be31222
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_28.txt
@@ -0,0 +1,49 @@
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ab3a1ed6f85d0cc4f118aa4909f191e941d0590
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_29.txt
@@ -0,0 +1 @@
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb401bcae1003a1ca00fe39c7c154a8b347ebfc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8c649934e00938dbbf567fe69d527ad36a305f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_30.txt
@@ -0,0 +1,45 @@
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dec2edc4d48aa1141b92bf48bcfa656e18bad888
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_31.txt
@@ -0,0 +1,28 @@
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a27b239518ab348fd00cad35f90fc188a0555f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_32.txt
@@ -0,0 +1,12 @@
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..000abaddb01f143005ae4230d3befa1dafb7dc31
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc63c7ae29e3c68e8512d808c41a46ecbcefb248
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cb0db1e1793516b6e4b4386e59f1c9ce873d99d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53a9b0df84a5cc98816ee94c4a1c77c18fdbf2df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aedcf76663a3ef9de4b1f2fdba4d451b8ccb0855
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ff448f0ae48281379ecb99349ed41c124894d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapas.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d605aba64f54a60a92180f810c9a66c072563635
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1779718b4bd14fc4fe311ae1207a3b20ecb899dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39da382e68afcbab673a4523e0dd142d0af26aba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b6a5ed1abd098bb80f8202a8ec1735e3d673149
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c6b866f80309494f1d0e903d835b9f96d6ed01c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ce9459d337f39692ffc451531dff89b7dc357ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
+
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52afa865c3fcc5f2859435cfbcb3bb01ae0fb623
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dea2ce4ba7404f9b18727ddcf0d024045261151
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tapex.txt_chunk_7.txt
@@ -0,0 +1,23 @@
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aede16c28e453a0cb46948db939e29a4977672b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7c44f4162b150c068481e23dcaf39b04deaeba0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c628d4b6f851e08bb471692d7eaee497d040e6e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e376735cca786b98b5d1f8f82e41a2b50a15476
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4d0715f2ce6eae75535bb882e682ec324d0df68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f47caaee36f6f94f464357453083ee8e2927215e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8617bbe25a34f286d374a833c741c8ae05a61ac8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f16b130c99984b1c7bfe9adb1412d3a7e377e9bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49b90968c622919db58228fb966454509659882d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c9639a48d8f48953c35e6ac31a400d55d7d900e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d71eb1b34b40597caf87f129e21bbe783f2c4e88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f879c54e91ef0035286233fc78321b540f6686
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98937a6e0f73dfc3a6263d316f129d7bd1cdb20d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b385582366d81704d9ffe73a7b527f3b49c40d99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6459f1e064bc6beae366ee7af983cd52a22f914
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16063174239ba20f8d76295bdb460adadedbe887
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fec78f9b1c8f103627e7d9b066a0b40a425a9943
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6f1c3d6a9ec47216304b1710d1af2024b069333
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe4dd2a26855025af287eb66877057685860104e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_trocr.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c24abf6639a62135ed54abb646fc732e59a262e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a75663fc1732c1dabca30b0d216527455e214f89
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a44f0c28a3a30c92d2b42a459c385a50a765806
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f0f38d505046bd27bea559131ed0e3256c24a45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e80352c417e0185cdc0c7a9ec416d3e601d04184
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11c035affd9367314433d29c2a36c248f002c7a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2524084ef1aa31df355253d63041a79cd9328f83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
@@ -0,0 +1,108 @@
+TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9bbef1e72ee109f97f4cff734cae8b4c0f5057
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d50de0a12d1298fddf6243dee37f817c2ce1d41e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8d1e8f0cc96c1321e0a86ae268c9f87f14b2a28
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..749e0b03e1b3b3b400638d97ad183d1b801a6a6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64886b2c1342a264b0a55abdc203f2eeffca2976
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_3.txt
@@ -0,0 +1 @@
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..320a95b3c521a2f2aebc323422273603b86c2340
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0931291805f36e4025a5d3187bd769029608eb37
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc285bc50969d499440d09a3afa49dce876b3ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_udop.txt_chunk_6.txt
@@ -0,0 +1,22 @@
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd4dc3c5a6ee5bc60542e14bd041286f8e14e475
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..354e30c8cb6dbc12a92a8eae928f88daa5cfa293
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_ul2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41aff19e8f69cfd0d6d2caa641b1f8f2092f8a02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1 @@
+UMT5
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f51c55b242e679872a05236bf3d8a23b8ed5c25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a303e5a46aed4681a310e8fef88d6aef1c3df92d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aec51635e59b29f3bc793c42d2aec636c2925c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_umt5.txt_chunk_3.txt
@@ -0,0 +1,34 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b0c6e78fb9bc26ab57cce1cb0e9f4eca249e1c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cc2460878cf6e340ece8e0d06b39b099542aecb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076493690923c99f76babb5f8b9f99afa8d888a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a79176f0ad4dfba638a90abcf302e6aba1eff95
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c08ea34ec014636041b7b4d03206051c9096d4bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b5592f853b5ab1e54aabafcc0cc5d19c9f3f60a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664efaae5b0a1e7a5d074f1656a5f4d59ed98266
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2abc46d9c52dc6c259af1d6826add825f343d66b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baefc2171adf295361be6ba04867ad7fb8fca385
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_univnet.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96f4984f817d8d8546a8d998d7eda5bfb539a815
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c01b9c3e7849a127ddb1ebe177ffff580d49e584
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b496e21161eebc4bf2c52ded26e1ea051037a8bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_upernet.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06ad59ca43e542a8aa96aea171eb188b36b28458
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb631a55545ab8e74adbd80694b4d95b70edd50e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..860eacbc260960e85c184fc154375533629634b9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_van.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0fdbfec488ff79ac1d92f0db30c07b3d244afd0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83dca64a69ac5ab704a11afa49f61e61ba48ed45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..204de4c4135da8bdb09fcccefa3d07d79407acdd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd8c2a8627766acb92ecd9b10ef458fb770b8fbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt
@@ -0,0 +1 @@
+For multiple turns conversation change the prompt format to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45a52305a76585ca3109e76ecfeaa48f98fb31c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd8f504ffaa5c2ec8a403f4b8c1e42d11b485b2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb5b28a22dea75996eb107dcb6365f2df5460bbf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt
@@ -0,0 +1,14 @@
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72ae4caa6c0a18a65aa540eb8b4494d0ef885321
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27c94f2fd5a9f055524dd4127271b923e6a6e1c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847cd8149a4f99625d9d8752c18bee05f2d5d1ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e6757219a50f60d8d77d91a1fb379f0da136bad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_videomae.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15ca969bc52b3377e8ab704a5c6686f87ae9027e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a404730d5b2524cd1dfd1aafa3975b0a97221789
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07ed75d9a90ae6a6a45d285ddb93b596e7935afe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..056f3388374433f6bd10c0446e2566d1338cc08f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vilt.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a74d52d3edbd25b5cac679238160985768a119ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ece087cce275617e5e8b8e2d394adff231b3781c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43503757db58ff923b49cfa25e183d79d773140d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e78721e1f926fdfc7cdc43cab7ab9c9de9cb839e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..439e4b5d854711bc143f6eac907baf774d40f361
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f350c527ffac4f91d87ae97f87b26896cd731a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..effbbb8e3e9ef559e959f66bb1089bdf5e1276e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc74d7dba607496c59c93d638cce5bf2a7c36c88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2c1bdcde220b39a325c4bc3965eec6a963c52be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f56284b8b6035c6df06a50167f20477d93e81b79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30037ec5faf1399843c9e85081845493a6fb36b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c678d6c51bfa4b5eae69b19ab105b1d194989841
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01e639cb8c5f6ac9395b77b477dc6ad9755e5f0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77bf5a5550c9b9fdd1c26965ac46c53221d0232f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4fc41e47373a9aefc2a493a48a5e75075fe9f96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00dfab5c0ef3fb3c01b05b6f8c9baf749af230a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11be9a6af0dc59e0269835de4f06a051f1182fdf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fb650165999edc14c3024b20ef75b0c4c038784
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8949c8412a3774f92c49843cc0d8c97c2f6030b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3f900aeb0167eeccb145c1fcd309277a9501cf3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6fd6febf399d5ebf3d146a35bdc8a45eccda937
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d686ecc16efef32b54e54c050761083e335daaa9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
+
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6041c7b15b3693d1b49e7a87aa2899dcae025511
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b233d9f9cef209cb58d9a428e6d04af1ebdce228
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f888724c8088965a210ad4722c231773f1552ce2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f8049aa3760cf3bb338b64bcba396d8e9bdc5cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..397ca8ddff0fc6d6da86fd60f75cccaf437c983e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4317ba7b2cd822909b660b4e5010345a92d986d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit.txt_chunk_7.txt
@@ -0,0 +1,35 @@
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..134f1b24638dcec90c50ea3b4f3638051b9daf3e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9de438979af27a9032c2aa3b5eb50e04832c8e97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e80a5099b45264d7c9cc0fcaa281570f3b02f2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca1f177445a1193aa0058ac90d0dbf03b9ed0d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d06bb89b7645424f34860e68ca011239b8ece38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c55f5cd06bae08089475e6c54ab7427698a5691
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f6734833e96929d1f4ce7f41ef9c30870c610f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ad30d01b16507b040f12e152e77dce3efbaf79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..077a5b1adf371ed05c8157295e4e32acafbddf14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a486fb7b04f59940072bc2d89fc8c9b52f44d70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..430a480b4e823f2308eb8f9e3513ae63d55a0bcd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19085e243e902339341f8a2db7768bb028e0d742
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..321ce996e105cfee4b94bc6a1a5b96744a8c470c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..347afe540acefa3d90632e422efd55b81bb5e026
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..555a5539362051763192fb8969cc023f1ff87705
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a209b27a23c2497d3b66d3d33e1bd026775e55ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f50d1802a79161dadfede17fc66b2236d8d46ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd99a7b80093e4908eca354e4600c0b5b941c4c0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1c54c8fb66b6249e23807e6372dd4abfaa366e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e0d1b2b9953267030cedec1ee999b55ec2f6d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2bacfa411f93bc4006b90a8f2e268ae0b3bff42
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0030e1cc33aef535aaf6c7d1aa535e8417509442
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a819a8c138159644b8692fb86c14943479f66f75
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vits.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vivit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb4edf8261b172da2a5993758fa82611b5dd0ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30278a57d0bf9c7bd6552a864831523a90da9926
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08afbcfdf93a0edae608cc28bad016372a9b07b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c34d923203bb278c36bdb86382b4786e25a0196
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b15169276ab8bb8a37837f64f88a0debbba272f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e479ae78b836ee15ecf3851a0a1fb84248e7a777
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e76ba2aa2b0b7a23ea8199947c0a95370e2a7e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af18f16d371dc7724eb742de5b657fe39bb37c69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c51591f4251cd7afc94aafbe673f45b83671fc59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c62db58c8277a14a087bff41f077ac74abdcfcf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b468e553f0bd881dc38fea2da690b12f292b320
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a5c9f20a11c3a169011adb38b6f2514a36772b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd8e161c0a26d9ff5d728f3223cc2766fc579e32
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f9498eba102bc4a882b6a64980b803d77898e62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abf7744dbe14949ddfea203ad19e6c6dbfb1489
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt
@@ -0,0 +1,26 @@
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
+
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da6a5dd82ee3a50ab2bed463660d179c839121e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt
@@ -0,0 +1,19 @@
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01d1f0ca5c246809058f9f6430a3d2f3fded14a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46d5a67e7f4b767f367887740777d9778ce6f9e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49c7eb4b8e1a706e621669c2f8c97082bc1c3974
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..629e6870552b6c8721c7535f39c1faf0333bb180
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd235c8da2a61ee3309e6e9f9cf65c495be0bc42
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..918f3530132d6d90cceb910c8e987384eb340d80
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d5c7f6c403592ce7c17051a00ee54f9260774f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c072e8f8dd06ed13c8ac296835140ee9b6e1bde
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf79dc0d24368ef0b712c723f36e7485cff93024
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e651eca2b38a506e9b02fdb0cf090f619e8ce859
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_whisper.txt_chunk_5.txt
@@ -0,0 +1,31 @@
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d4e238de02d8ab44332cab1f8ec3c4b9f3e102c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd28391b63a370b2d51404d7d3e7b1a10d39167
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xclip.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b34d2ec84ae420030ac73a1430f6e70b884abaf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6ef43b4a5c39574e73c90c7126785fdfb9db16a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xglm.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8f7bf81f7634598c07e63de1cc87ac1c317620
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05fb93f5cd7b763bf028b29b44679fae45d4af6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59626d12e91d59d5d95faa08f4df5f982f0a5f83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c8e509324dcce5050f1ba4961031ce38d61f1de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e1b691f3aef7792fb1a4f93859198e5ca9c5e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5da4a8bd2cac2240eb0c524fb223ff6c5542ecaf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1 @@
+XLM-RoBERTa
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e89ccc4d42d0c7612e4ca3c388fa109f8cf3d6b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0a8d81b2649daf9fd7fcfb962c3c0386b7ec7d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c198db4a6f6065b347dd3beeb2c88a2367f48831
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b52e998025ba35222a48691589ddc167f68c7cf0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
+
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16daec77cde00651547a5acd292897a8b90a6073
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8174c5698fab1dd7a8c522486075958c5058c57b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11566738394c8a519179c8da3b2af2e64b776ca4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4461fc690e09efef5e2c07c377646faa4a472488
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c59d0421ac4709b33cd1f8425c06cd456a095486
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84e8af84ae7b8de56f1625e4d16f540b81dbc3a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03579505a30f9c5bb7dd4ba5770c75c0587b10a3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3b19d83745b252fdeb8aa4c9fbda6c4c929ccae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..378d9acc869917ce05ee792dc9ac5ac1a33e79d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bdf352d76a98fbc357413661d4f491ac54615f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62fe31476f7a8e9092b5da27c3c48bc1ab9a4592
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_4.txt
@@ -0,0 +1,41 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d22567bc01aadff98dea75a54fe66c002167113
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlm.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b72617d582bcebe37a3ddba61455b3c6fec6151
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1 @@
+XLNet
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f138cea096bbb385c67401ec9034d87dd0e6f46d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aeb53d45ac3b7b2b40b599f804401329d479900f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b98c1bec05d798aeb1d0ddc479002990021d8d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..012370557c769c9b54b9a0743a21e376bdaf2a91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e78ef24d91ce0fb2fb4c51f3698a3fbaeb2a809
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt
@@ -0,0 +1,40 @@
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d2807676fc9f51d10a1a809787f8f089941a101
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f407c256e8da7922072fd385027c202882538f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bea4d5badee1e69f29f2d721791eb207d1da7199
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4500f1e0db3909ae85a44a0a83a7e8edc290519d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a1f6a22d300207e64101efff1769a88ec9803f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d706fe06252e175a093fe5e5a1c5a5f3e988bc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ce7c7c497620e4e22d860833a255880b9fb4950
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_xmod.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c206d5628282ab3d582e6f4473b67d0753c675d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78d9caf98bafe59706e69f7cd59dc4f02d63540d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e06cccf0f84dd7f7efa7315e7b76d5378901007
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf6dda2d306c9fd5c5f9a23a4cdf99af405362cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32df62e4ed5b7a9e91c5641be54e899de81d6845
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yolos.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992b9d249276db52bb3656f0c104057675027a8b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65e60c5e781617a64324ef39e48b64857e25af85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9166fac0433063b2f615c72495e501bfddc2456
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/model_doc_yoso.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cf9104e6cefdeecfe95b84fdf4a27ed85c1549f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf3b9a9e5c81bae5651ce36bee0621c0f9db825c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca845a0a99c901106aeae5642c683abfe724fa40
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_aqlm.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca8bf3bc83c4fc23050d02c749771fae18d5068
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9b3d66d7275759cc8fdcf3bfdceff9278dcb2d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08b98ccbc385be4d342c7b112083816b76c65452
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ebb7270b3bfe832ca7d4419599c33671ffb2f06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a41c56f3f86e65cbfcca279eef568341e6a3c865
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8530103a82d3e7013df175ab5020b357f71bee62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9a5eae7e15c0b92c582aa9a8f272cb9bae2e3d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d1aca8e228c79520f170af34d784f8eb018b70b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6aca8590501d52aaa72ed20d5c8f810057c611b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_awq.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b56fcac7d50ae6e05cf6a6927059b3604b0aff8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccd826e839d96b70f85ec54c8bcdb03bf3f2fc71
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74e91f78201e15dc80aa5d7eb3dcb3c2b25ea213
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ade11ed9216681d9fd03b6e9aaacabb037f7faa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt
@@ -0,0 +1,12 @@
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4538565222dbcaa4581b9202d00e01c93078d571
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..208f0dc35789de6e226a3dde4c20b15ebcbb38c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1084da14dcf1aa9fa247b89fb10bbd4544a5d1b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b458c99947e08a0a9eafdb4c27d0443c20c6756d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ff6951453fdef96e495ecf3d72ad2bef329115f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..468fd7ccbc5b9f1c4967c022fb0a38d65cf0ec40
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt
@@ -0,0 +1,24 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16040eb9a240675fa2f9663a5ac3fa2edd019aef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5f9a1c3dc19fe90ddf15164ac99b6bfc238c361
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992d839018f73aac7739528dda8c322cda94b5f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..506e49c5090b3a08ef6db708146cb297073a505e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b0fa7b7283f63b922d6eb84a6bbb7a80af4e46b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..670bef5865d704e5f7c505ec032ea7e6974ebd36
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2fafce0689af4038aa1887fe80c91def5f38e40
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f81e64b05e17f10cd4d526297ddfe2207a9da537
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
+
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9da9db1d4a806cc75870cd1128dfb8d1a7378af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_contribute.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_eetq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7995feda98bd7095f15d27e24743be94e4e33ac3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7cabb4e877b2203c30eac7f25b99eba7bd18880
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7afc63248daeccf18ba64a56b8b43473cd9bb98b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..373013411842576cce0757ff88f0320f86619530
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
+
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..905b3e30f140b3a6e63006e36ffc2f41c9aa8272
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b55678a1ced319e6df0c26f4a27d0d26b5c3f394
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_gptq.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178494135484efa90b03e9bb0e57b39c138fa787
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ded36f2fcca57c803a8561a8f418836074bf88aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03394dbf60a5d972340e41fe6c2b440093b6751f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_hqq.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_optimum.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ab85bd2d834ff4246cc016a95ff98ed892a9b55
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21d9b0069956ad08c97c5a74d969dfde94509fb4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_overview.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff0c4cbd991d0ef0ef14ca32de384a29778d9845
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6db51daf50912be9a0f9f3167939e6b1138c0f32
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/quantization_quanto.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..376387d917561e0a7092688a2a470a190e73b071
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27d4b8428d9791186ad59bf49e8325c1c2395f48
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5cfe43ffe0af9e1a56e86f7c48aa0b682d40f1a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b6576c4fd0af7ad5428e91547f0c0f9d2ace5b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7933a78a81bcc739c6790d42d7e2345ec3e6aa70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_12.txt
@@ -0,0 +1,34 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f2147a06297b4986cf72033a09f275d86e0506e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_13.txt
@@ -0,0 +1,15 @@
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fda3b73165e9b318bc8d69e7b4c93632f4bedf65
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_14.txt
@@ -0,0 +1,21 @@
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acdd676f9583d6ca34d326c32ac327d94df8ea66
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97a7575d6128a83b45e6ef1736fad4e3ea2314ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..491d104901a1a851a6e22572caec63dc499113e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c031d857456eccee0a0413b30c11add5635c5e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0267eab102b7d4ba632d49cac9f1b95891fd4636
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca395a0914041ce3052294afcb4c5dfc94d96bef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72b8db075033aa26da1c91b43ef3879e11073ead
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb5c14e43910f96555ff8abe28f54c23691f62d8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0407f5c931151db5e2e1cf98e65ec3ae5a3e17fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_asr.txt_chunk_9.txt
@@ -0,0 +1,20 @@
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff04b3adca8b17687979c7924bfa54e8c5ec91a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba48c69b95e6ae244d175a852863529eaa23558e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf36dec867d30cd20ec77e478be7daef4f72db46
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..148ac8325649b2d9ceac21b7a8c93fb52662cf9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c1c62f4560a085ed1b2c4f28fa628cad94234b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt
@@ -0,0 +1,14 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cba23765a59a27c5248edbf5ca3d4021e9e6ca3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d4f7c11866f762e11e9ed48d13b6e3229d2dce1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d4d239ccf33d396a63bf57ff79efe12199f24aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd1668ce7d33b13e3c1fb50d6ba0127df95e3b03
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81e909b05360ab34f10393bb94ce4eac54efdc31
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt
@@ -0,0 +1,17 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac6e5e9a04d69e733080cdc3543e8f052c26006
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ced8fc7df130163a2f8c97bd8e2a161ce58cc9a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3033ac57bdd1659cdb47ca3c5d9e4466bbc1469f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt
@@ -0,0 +1,30 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e02373b70a00aae9d2d0210b67f9ea2a732d8fa4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d04a9da39c46b7d9debaf9b2adf85f87d00f7db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5ee44c8c3f31081f048760bcbb15d2bc2bbbac0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b67ba79187fbd1538601ac11939f90ea8b2ff58
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt
@@ -0,0 +1,15 @@
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0452f1b6c87afc2f7d7c85b709d7fe7fa217c040
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f76c4df9035764cf99bad5edbc97d709bacabd9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt
@@ -0,0 +1,12 @@
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development 
+
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b556a880c24c24cd741b8e7106ac0c57d9ea047
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3a46407e0dcd38f5e34a1e779014302ea83e220
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt
@@ -0,0 +1,45 @@
+# encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61d7b867483124b0c8252d2cbff1a1599cc4e498
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d163e8e41a7af990c33451d4f25efbc87fefca86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5382f8ed1105fd204dde1050cad718a9bbb0a72
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5c4d933ea6d143b6b4a2b811b8a160eae97a3f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt
@@ -0,0 +1,22 @@
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0be6044f89b82e5f5dbcb1eac1ed5d82408cc96d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd423dd575575b0c1d929a0f779b27eba0c06577
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt
@@ -0,0 +1,27 @@
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59b59911c8a3fa51483b8fce5f8a6f8bef09c20e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt
@@ -0,0 +1,18 @@
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac91a57ea2fd938a88d87d3706006801fdf07e1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt
@@ -0,0 +1,7 @@
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775c3ac31210e0add3a732d58796d92f5d3ecad4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1a0c87b91815f98426c2b5b489962b2e202ccfb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5115be50f8b66b39c1c9e2079646297a05e6232
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f957c2f8c66cb9b3ca4daa57395847ce03ab0ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d95c65d3d23b92d79d31791c08bb5274b3a22de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2220f0e5c7645beae0d55edea0124f46e0d1a258
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c4462a5645db507f68859354b276b2f9bdfe77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e76de1603029814ce453d2812707b4835ed904eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7278d718708f1e65b8eed46c3339aa839e5c4ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de4e5a9b77f7f7eb81e7e82c01adb4040b480800
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faf371e310ccf117eb8598e9a5bfdc85e1c1d9f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2eec7c7c0d96fb1cbaa20a41c2a1c9b8aa4fad2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2afbd2b7e5932c5e0a7fdb81c3587ced499278
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f2602d6f81b8fcc3ba24072b52ed56484e693b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_13.txt
@@ -0,0 +1,13 @@
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e59cac95c9edba70f0312d0c68fd075803f3dd6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d75ea0137f6a33db8e4a2495837858d79fd114a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_15.txt
@@ -0,0 +1,3 @@
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76efef7aa8201cb7ac259295f0e1e7ba4045c81c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_16.txt
@@ -0,0 +1,19 @@
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b56a8d7dff64a805b631cc73f0e3a8d6a4a2930b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..436ec14b596a85393f3bf0a23cece4ea1529cc03
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_18.txt
@@ -0,0 +1,19 @@
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8f15d4d2295ccacf850f001252e18eb7b1bf994
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3aa8b2755abe2fb1bcb38c8f821c717b2ac03b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..754d43c7acf816fc2700626ea1a21b3e859dbe01
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
+
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e28917a7d587e8d33b5a07fa200f32fa49184b79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fe286eb34748ec702e84b79eddc75a632b4c475
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3d8350208c800c26661d74b2878aa51364cb6e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4563f11eed0c8a560e90dfb372b50b54aeb43445
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e78c5200c8ff57c0ea7b4a28cf20f7891f4bb21
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..280bca6f8bdb5cdd4d02199c576bbe5022765f8c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_idefics.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d166dfb3490a359c2df91c7b14a7ba9c522b0e60
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cafcdf2a6f02f3b6e95b63ac9747ade264b26ad8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ccdbddc959c15a8b789357ad4c684b328ba07d8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bf8723eab86a2ba3a900b9ac2654e1c61efa504
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67242c319cbd3aa134ee7b03a92d3434ae7be845
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e5f97013f14f0027abdc60109f3e60a119a943
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a55af9845a6b7efd531ec54db8c36f9ddeb50e9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13998ee7d0ce0652776b633a6d1681538d8f7774
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..090f062c68f275619d0ac7809a0a7d6a6815247b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f0d8727e5aa20c5ef9d828e959cb4bb37f09a08
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54b7e366382e9f28149a06af1a7dc5956607a60e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18ae20644a153cc0f55e64996a3be1a620fc9d2d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_11.txt
@@ -0,0 +1,32 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..905a0c5cff247e2ef62ac03cc169d05a87523527
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_12.txt
@@ -0,0 +1,25 @@
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06fc917ff8a25f20e1cf2bed180ea19781d45925
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_13.txt
@@ -0,0 +1,21 @@
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b95ae615b482fae1c3008d4f9fcd8a36ddd590c4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_14.txt
@@ -0,0 +1,21 @@
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9ab6d0900263ad0306aa6778c61760767826a5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_15.txt
@@ -0,0 +1,15 @@
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c68bcb44516ee6b425db0c62879a474b35340589
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_16.txt
@@ -0,0 +1,21 @@
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8f820de289a6813b3006fa14e87f5dc3448fc39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_17.txt
@@ -0,0 +1,21 @@
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb5171731b82cb70f56d682a3bf29aba1c562d8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_18.txt
@@ -0,0 +1,17 @@
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c1aace3452b12ec4ebdeccf22a7957e6c6e80d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5723ad231ee9968bb17ee57e87b8918a1663a84
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..198b2d3888681ffc036cb5a6c57fcd54391229f1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53d44d29646a329dc9b4bce8e93c470610714dc3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..543011c16319cbc9beda51b6762fbd1f3fb734f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c40f783c3357f9d07d509cc6b86ebf4f291659e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27225e31aff59517e1d8ab749c6ebad22f3748d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44a12d707a497d4172a03ac09866203570e54cdb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_classification.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08c9ca51bdb6c88c72c67058abf1fed359037347
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a653cf432f41eab01c3829c05bc37f97b7c05ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e2d7ea942c5ebdb6f42d68ef601bb5176138022
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
+
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82b10b7d6b8bdb94a35d06009789feaf421f1709
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e86f0bb5032481a4e6f4d36071e18eb1a678ba3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d82ca1f92c6b39131aca30bf617246073b6f84b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4267fdba1e469d1e89797f21ee4225428a7bdd5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..159edea1f16d8eccb222d134c35f0ce8500c4208
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b54ecf4f2bfe13f5be372cde20c5a31fa36354f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
+
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd5bdf3017a5945a3c02419fdd848eaefb404d0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c6563f1b78b5c688c347aee126668ce5745592a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e58845df468d4e2bdd22c72961824db15917dc2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad077ecb05a99796d740a6935315e3364df5c570
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bfbdc2cff7c8e3f81d9c40810da49e7f1a99649
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b018f7061f221f0ca8fbbd06df8a9984a1619eb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06f86f41a4cd981e28554bc6e088e743ae6fa131
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a12836b24029329f021144826d3d1561bde458a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0586975e2d779bc25a0c4fc4f908495c9880f291
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d21de4d06b6fe192aa1dcf71c088139d05c2ce3d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18673f00c5610fc49b8b5f85bafc2338688ceb68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0b2c6a3afad2e4889c49619b7c8fe3ef86bdac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb9558919022d10bc32f92e585da776b158d8d29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt
@@ -0,0 +1,28 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5066644c14d3f021195c5b7588b2fe0930596a99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt
@@ -0,0 +1,26 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..000cb7cd7fda7aefa59aaed3bea5f5a772aae5a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt
@@ -0,0 +1,18 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17a3c9dc0bf38b30d1f1afe2954b6e9bad63ef1b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt
@@ -0,0 +1,13 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50f2687a0efceb807e9548f1d64994821d3c9389
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75669b35f7470a52de649fc8c2183b4c51b52cd6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6af0b6539d31f4ad5caa36fd3e7ca01b041f69c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb082f110c34df6c4e9d0d7a0de79a12baa94bb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec337b4059b1c8d378c6a390f3c72bca6722f85f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..889e87be8f2297d81beede108108ca06a86de472
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ddb202cb50226e6a7568c9b8eb0f2792791943a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac3891bc0504f91c3b825b12ffc19946fd360bc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b483a0918927adbd3d21ea248e867f5bcbeea43
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd23d3cc1ae40ee49d579e4f997c7cdc7ab9b81e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,20 @@
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ec2f307a3b4520235540e4cc811740cbbc8653d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,17 @@
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f519f4faf16f7435d509a3a63d678339124c5673
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cba54e49745b1ef8e5eb5692a5e5605ae9daa22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d012812625aff6a068bbf267914cde923a8cdce8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ea4caf28098970f5bb6f9d058cf44f278e5b0b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a483a8f978d9d12ff62b12f66c3d94a45a2062
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e347c9ec242574cdf9f8505e762ca642d10c946
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6227bd2ae3765e6e257807b88b58d00daeeea9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18dbbcc604cbd762494b8fce13023dccecb345fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..371dfbd6933baaed77913a3fc767d178f9e9c96e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63f3c853c808d7aef13175db5de659f2977481eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..caedecc6e15bdf943363b1b0f4862a94e984cd8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23a02e2f71162bbb683760ca7de61b2764260296
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt
@@ -0,0 +1,29 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7a47f7cebb33588918b9b254a53f81333ede332
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt
@@ -0,0 +1,26 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7535d77f18975b1315e91872f7121b8ce390fb25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt
@@ -0,0 +1,18 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ba87271c4affc7a6f5c220f43409cb789597935
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt
@@ -0,0 +1,13 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e71191bb64a9fec4119779d81d95551677a37bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt
@@ -0,0 +1,24 @@
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
+
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee56b565b419110eeba8f7ed7b43632a534fc4f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt
@@ -0,0 +1,19 @@
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8aa3739f673a6d16039dc5326377e8c61be3123
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt
@@ -0,0 +1,20 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec337b4059b1c8d378c6a390f3c72bca6722f85f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8396f382389c40237176dd1acbfd3024dd84543
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ddb202cb50226e6a7568c9b8eb0f2792791943a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac3891bc0504f91c3b825b12ffc19946fd360bc5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc825619f9071589585e8710895afcf75184391
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a88d98571d3761fffcaf31a704962ba8f2364f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,19 @@
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..392d544595e42431d46872f66ae1d8ca70edfb58
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c27d0ca408c858851809fa1538722b917ee6f00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9ec04cc6f01ce3c3441d2c2f3db5e0b4d5f80a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c116a2bc45c200f629e0318711b827310b98220
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30d20c78b175c0da8630050808bd00485c920da0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34141fb4ffc500c3a3e05575e0679cbc7fca63df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c75bd08b916f1bf933bc4e27c555b525f874b7e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e037b082ae07732c4e4c5672b00b133f15ea04a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a759cf3210d6ca2c4b4898c183da8b75e9e318d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt
@@ -0,0 +1,28 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..304ba913792259edb0a08a2b221a7ea573da6350
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81e79b3abd1ae3bb069953465a206673c80c79fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt
@@ -0,0 +1,20 @@
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7219aaafb0dc61c77632c258ee42d6f02cf7809f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt
@@ -0,0 +1,20 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa6911ce9f4f76e8c0dd0db5796752f3e58f3c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt
@@ -0,0 +1,20 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..506cab47a5706fe01f061b71d8abe609fa22ee11
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt
@@ -0,0 +1,26 @@
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11083a1e068f83c0f98c87157028129da599b703
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcda3664f163a546b5fdcde20d2f1ca3a20c93ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8548b8d9e2f99e31f16ea0711c5d1d3592dee828
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e511ee1bd76a7b1140afb9356e213e6840984d22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656247f110e19f4642c98946f51f023243d27928
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..660c6c27a58494fb2f1ea77cecd6ad958f646ccc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52f6b8870b10714350fa5d80f7d76a27a099f66e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt
@@ -0,0 +1,23 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77f76a2c2bf81cf48061baf11f8c66cb6ab7659f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0316d41d0d4693a0431075fd1e150eddf1230b12
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ac254369bcc46feb831a1a9760c75108c1ede2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1d9eb4ee3d00d2fe4ced145589645eeb509db7c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c35dcd1d0300600f750ac7a9057a32af8edc772
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_10.txt
@@ -0,0 +1,26 @@
+Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aaa02e2b1c64a1cbc0855681d85f9f939352d8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4782ecd36227a05d2983b3ea95fe736163ba7314
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_12.txt
@@ -0,0 +1,17 @@
+images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d57c49288393a2ee861667f6b052385684825ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..235137af53996a774382c77384bf8dfa1dd9c033
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_14.txt
@@ -0,0 +1,19 @@
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c4d3fdeb3c84693d2118a9c1afa3424ab28cbb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_15.txt
@@ -0,0 +1,15 @@
+[[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
+
+      [[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..852bad7bd18da9c09c55d0fea631a2c240c954eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
+
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08babae702b6ab8e97b03d6b5eb5472e526c6839
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_17.txt
@@ -0,0 +1,12 @@
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a2e0d1fb6c2afa705b485abcd9f88d44231bb6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_18.txt
@@ -0,0 +1,20 @@
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31af40a1f12b6ca7b53551595f2723f7b5eeb244
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..711a8f894d10624b809dba4bfb552c1ca2fad87d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
+
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..870af6bc275ea53a2951a048a8256f6734b0d870
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_20.txt
@@ -0,0 +1,50 @@
+Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f646f19bb0ea0637bcbed0503c2b417535b95e58
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bce7780d8c25b6e7ceb5cb505feb5978bc29dfb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_22.txt
@@ -0,0 +1,11 @@
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5374647d778ed853d6d67757e6a940411350adc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00df575c6e64fdd51de1e0d3da44026b3abffac2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_24.txt
@@ -0,0 +1,7 @@
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d3986e282992fc6c8e00a0177082bc55d0b2d6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_25.txt
@@ -0,0 +1,35 @@
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dad8be2eff73089fc574d9662e19642b090f5e4f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_26.txt
@@ -0,0 +1,105 @@
+[3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7754b415765e1155aea09bebf249f9ee423c7957
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_27.txt
@@ -0,0 +1,103 @@
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8bc293bdbaac3b2721fe192043cfda674993514
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_28.txt
@@ -0,0 +1,103 @@
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e48e0aa3d9291e2b5e7f004507c32edd88129b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_29.txt
@@ -0,0 +1,103 @@
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..795e1e312ff61f00053770594a0dd310b37348b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7cb365dcf52b6a29433c6ea98ac02d9249f9ff6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_30.txt
@@ -0,0 +1,103 @@
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09754b39e956a50d2bb72474f3ad52410abaa790
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_31.txt
@@ -0,0 +1,103 @@
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
+
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_32.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f68835701caf06b2fb7a5342eed14264e27b7d76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_32.txt
@@ -0,0 +1,103 @@
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_33.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb9023d6257284c2ed18a83448d7ca1d86be112
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_33.txt
@@ -0,0 +1,86 @@
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_34.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cecb02f883a35599259bf213aab51462a50a9fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_34.txt
@@ -0,0 +1,35 @@
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_35.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30ac9b75efb9761d08d20df2ba6b767a8292b1ae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_35.txt
@@ -0,0 +1,25 @@
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_36.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3f06d01fe3c97b6e36c020a19f4bc87e3325f88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_36.txt
@@ -0,0 +1,7 @@
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_37.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31fbaa0e2fdc7d5efad0dc7e44c8e4007429ea40
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_37.txt
@@ -0,0 +1,15 @@
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_38.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f299ccc3e5b234363f93d0c06c6aacf2a53d99f1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_38.txt
@@ -0,0 +1,11 @@
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c216d7301056494daede0d55b7664c160a47ff8f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72f7129ff3296d5481a5bf1d6c5acbd234b8bf48
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00747b3b9de11f92f8f487cdbec02463bbdd5b59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2c79c142ad452993a7d3b311b683045fb7ca475
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1e635e772428f3662f1741bc61e4d41357067d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be97fa093a504c6e8b7fdf1106986faf2ee59b92
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
@@ -0,0 +1,20 @@
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b80414bf1568e49f662786f3886aac4bc39a6293
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da728c1c30d04617f3e87578360027836223c9c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..433b9ea65ed408cf38ecb29a077b7b8c28db9cca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67a2a669dff7a3e3d3b9668029eab20cdf69cd0d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5e650368610e05caa3e55ff5cd4b679fd218a67
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f687d2eb3742254d7ee02fe856085a4dc4b0011e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebf4bca7cb28ff857a0f2aebbd10ad25f65b97e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbd701328f25b727e4d615a6471d12f7aa1c508f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_15.txt
@@ -0,0 +1,22 @@
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
+
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4831341c468fd99046cea3889f85c2d2d01c86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_16.txt
@@ -0,0 +1,19 @@
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
+
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..995708bfaac95e6ea35dfc073a3f4cc19d7ae84d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c956e2ce48e48be774f1b731f3e40383ade15141
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75fc16b91699b68de4b946c6e284d083e0a6a439
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_19.txt
@@ -0,0 +1,17 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84a715e631a49be8a735306a1d53842ffe90720f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3d85f627dabdcf5662735d240eb75b735d51662
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_20.txt
@@ -0,0 +1,26 @@
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c067f776e68b2981922e2aa5f8be26fb506f747
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4548644ead2dfdc5c4e8929ff4ebfe8465c8438d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..413f0632c30496638a233f43ebaf81c60ad34201
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e00d38bccaede5f2e772023ed12b4da4bbd198b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c7d33e862f2e136d5644c484f4f7f5e98e2063b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..486f80cdb9a1dd82bada5a70a28faccfec45eeee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af6a7975e7d6d071d617ab9274f6e696eb0d679e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0596eae7b3d2b7a2358f238465081133cfc04042
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_prompting.txt_chunk_9.txt
@@ -0,0 +1,21 @@
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
+
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d386d69c2c436b8468baa16b8dca15b1d0e92ca0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..164c5249f14bdc269dcc52beeb807857b51d5bab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b81946138846736b11af164748da168aada047aa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_10.txt
@@ -0,0 +1,31 @@
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e81207b81a3903caa20a5cdec8d932327dec873e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_11.txt
@@ -0,0 +1,22 @@
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d4246b61b146cc5b1cbe60355b0644b3ac4952f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..158c9ee3c0a5c5ced38a81d8fc65d890e6187842
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_13.txt
@@ -0,0 +1,19 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b0d95532d9a7eb0c0eef60f32db64947edc8ff8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_14.txt
@@ -0,0 +1,26 @@
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb1e365e8be7e35268b80e07747dc0b5ff955db8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_15.txt
@@ -0,0 +1,16 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13da8d38d015b1bb4e0f410782e3947ab2d2cf6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c99aa3221e5f031c595583d964bc028b07a47feb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbe5adda42417c163a9c48fd6f6e2300710ee490
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ff7a571044ca21bbe544a21634be0bc1f3b3934
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46e28fe79f70951ce5371fa58c3c4d941bedb2f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_6.txt
@@ -0,0 +1,34 @@
+offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea12e469f9ee25d67dd574037ca668f0ecd6e340
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f8ca6b59325e945b2739efd5354fbed70f88781
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8afd9ef33dbcdeabf997ee44743df6a9ae255a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_question_answering.txt_chunk_9.txt
@@ -0,0 +1,27 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85bd9dca576ff3f66d9c328ac50e704e0ddb58e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cea3ea0813f9c986f1add944c72508c0cb52623f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..275b6502b95bb97dcdbd6a1609284347533aa2b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3ca410b319999c7815aca6c774885ef9fcafbe2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
@@ -0,0 +1,24 @@
+from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb224945a40068e03fc0636c1efb0d1cda36847a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
+
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4fb27a4baf70b49ae26fa4f922b974ee4bd3117
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9059c843927471798ad963a83e95a5a2c5062e0c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
@@ -0,0 +1,15 @@
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2a8df481def2dbe9d245f9e41a257cf8f47022
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a2e9b376aa41a16e9a77d84df53c52800f2f584
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
@@ -0,0 +1,17 @@
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39e059de4df84b4e3d2df5ee1c3196d43af6326b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3b00e2df259f9b90620734d6d5dbed849ed9081
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bfa1c813d1c0cf8358446a68b6d992ad0bbc584
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt
@@ -0,0 +1,26 @@
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cc00ca91668b10879c88a46b9c101690adda498
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a43187841130f5fd113c0d4d7db0d4601ca309a3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt
@@ -0,0 +1,22 @@
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
+
+     pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..760936b29c83da15cbc7afc969c48424282cd831
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8977e461790ce9d8b5c1722cbcbccf2b2c875d6f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02c7ca454e8ac1b6b301778f20e754c881995851
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt
@@ -0,0 +1,30 @@
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5779dba3a307179682fa5b155249b7fd9d15cd9c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt
@@ -0,0 +1,23 @@
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30159769ab9fa09a5968a2cb08da6fb5ff401964
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt
@@ -0,0 +1,12 @@
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8570e5a2fb33e3ed1b69dcd64241fac374e0376
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt
@@ -0,0 +1,18 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1fd06c1f70f5807474c67857ca784a6a91ea433
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt
@@ -0,0 +1,22 @@
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1079b6fe934137a2221202ce57099fcea8936edf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt
@@ -0,0 +1,29 @@
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b794d4f7a33e888c82c3f0008ea1a1e617e2d9b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt
@@ -0,0 +1,21 @@
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5faef916014208cecfa07761f8052fa61e2acd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a5987e2e6a69f0b9b1fbb5f7a4dd0354ea8cfdc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt
@@ -0,0 +1,157 @@
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de327c3d0fa30a1ac6efd00b18e375aaf7a8be95
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt
@@ -0,0 +1,12 @@
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33019cdfeeae9b4cc175ab579bdeb9f70b6a441d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df9e4f9a439a42e577a9d557bcdbc93a5508100
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b4fcc6a02493b10c9f65458c0553efc4ab4aa11
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cb30a16140a9108e3e4a4e766a4f1ab0f6b90cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
@@ -0,0 +1,28 @@
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9980686caeed79e114e513a1e1f49232d522ec8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d6bb82b91c08565d477ea843e9a0f6bfb25d5c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
@@ -0,0 +1,25 @@
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa80069276883784dce17cc2932abd77b8e61971
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9e9cd7a18c3d966dc01fa46c248defd5d9f1fca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6503fa1709b363647e08ee728482331db49de906
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt
@@ -0,0 +1,22 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b080cf05197c8542e2bc0e4196ce486a185294c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea9abe97cd358007a76cd01ed289e7ee5ebe01ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt
@@ -0,0 +1,20 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7b24d397c5aee1d48bc8889cc1388675fcd703
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt
@@ -0,0 +1,26 @@
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b5fd8f46b94de466b9613ea7eac682d1c2fa6d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0903128b6f1e0eb2c189c57401a2645029e28bea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c296c37fff323768a2ed59488e5d2c8091e8e33f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd1c12e540d0f5df376cfb0df743982e905c89f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a18aa031916d88fcfee139d3d08c33081c89c93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2939bc5aa03e53687750ada0a991bba665f829c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ced8fc7df130163a2f8c97bd8e2a161ce58cc9a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bece9e7893b52520a99ad0f023eae17a4a0ee0ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt
@@ -0,0 +1,28 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42cce2982f1e68196adfe7c92fdc987084ff81fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt
@@ -0,0 +1,20 @@
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93b1aaf77f82748a34e6c6e6efd9cd0c94eaf174
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25a51c2bee7d2c48e1f6d153278b986d55a587fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..811cd3c2ff1074fbd0daabe6103c6c725fdc2066
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29742b0d0b0101dc3682f49ad0d7666910e5b412
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_11.txt
@@ -0,0 +1,17 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6781ee29234329c8fbd3516a5dbc91d35962c6d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..723ff177413bcfae8b58b8331c9f2ef2b546d0b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b3ea920785cc74bc296596fb01482f63ec8f12a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ad3c3bc1def93429d23cb10d1e0bbb86654aa91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_15.txt
@@ -0,0 +1,10 @@
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25b4db4006c327cf29b437cecf6d65f51dd9ec7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
+ 'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdb5c1609e7e0aec4b28e04009ffdea7c661c2d2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..056ec89ae33bcec5d4a9e2e8c00d41dc12e86aab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fe25b0c79a6c293ec49eacfe2ea84b9fddb5917
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16223acd0bb4bf4bb9e7d5599cf018d7f41af9d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f183eba5907d5ac87facff76d19ff0969b8b2af6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e987a6f45127e4812db8351f96c4f778d9d98e7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_8.txt
@@ -0,0 +1,29 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7db807cbc9087f98af4539f2f4fdd4288755a51
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_summarization.txt_chunk_9.txt
@@ -0,0 +1,26 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3637076cbbb6f8d1c65b540dcbe35bb9b0544c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6b266e0922c5059ad592d15a2ba9d36f89fef39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..339bd94abe0042cdaa50a677be8101fd6fee0b0b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b26c0b873b3cc106da688486ae61bde33b26be
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..848e06a4c9648c4e0aee1e2780668b58a34dafb1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt
@@ -0,0 +1,12 @@
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1dad143f346f86a5b47083cf20a9dcb3c6736822
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt
@@ -0,0 +1,32 @@
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b475b60868da11314fc929649d1c7aa9f214af4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0063e2699d9c3c193d7c9166e4618d67d3ec8d59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt
@@ -0,0 +1,21 @@
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4f5c6e21a9bf8baf6b42be91dc078275bce4cb4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt
@@ -0,0 +1,21 @@
+def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab240e4485e99647d5ea66c266125b5f304655ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt
@@ -0,0 +1,22 @@
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3774a58f2c48706fb0eed5f7b115d77c03f2a62c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt
@@ -0,0 +1,33 @@
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3770c9b230833516e18ffa9098ef1c432a3108d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt
@@ -0,0 +1,22 @@
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5eb7207248f53ebb906cd03a340364d517e04e68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05a9d3f901345c3feba30857226d9e20432732e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt
@@ -0,0 +1,22 @@
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76f8aea2a59f122a62e00a31a8262069e685ed79
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt
@@ -0,0 +1,32 @@
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e3e7220443e10ee2c95d971c8fbc7b4bbcee61e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f7b8be9ef86a46840f42482ca7a66baed4639ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt
@@ -0,0 +1,10 @@
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a70b1d359d8c7963f3a5bcb428f6bd0ae1c4f099
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2a65f075b0359e8264142295e75c774e598707c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cef715d251af54fb84b7ea3ea961209f91efe27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..627361f4be20ae929e354d58cc69be97c68f4032
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c914d77960be6e1c94f7e031ef4d069adbf10bb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64aabdd13888a93ebe693e7c93031809487442d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b274d95803694019ee3dd54cb427ecca65956f90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e8e594bcb1662f27451da12307f0903779e8cc4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d263ebcde2253c3869defc36d0510de1f67ecf5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcb868b3af8a38c03eeb1fbc7e61e5e7d3e8bb45
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eba9989e65339c4ae765fc2f36a595a245a50c0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_11.txt
@@ -0,0 +1,28 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3175d31ccc825fcc53e45036c245811d3ac3bc44
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..768449ac34b57471273f7d4f8f4f628730d26152
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_13.txt
@@ -0,0 +1,22 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76be3a951654cd6735ffd2ff2afe6c0b85aa2b38
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_14.txt
@@ -0,0 +1,20 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb40c83721af18a0588dada7673024aa852ee984
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_15.txt
@@ -0,0 +1,13 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a21612309899ea9d5b4b903bac80c4958849d437
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_16.txt
@@ -0,0 +1,41 @@
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a04bb74537ffae294ecc562fe6af466acffcd41a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_17.txt
@@ -0,0 +1,37 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1e0556c9428c90fdaeb2dbf5571a8c54fdd5caf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_18.txt
@@ -0,0 +1,28 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61f69b3c8a90be31c65680ca82b64bcd1923826a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ef017792bf1bb7ff6cca62c7c751e9e78cc76b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..621a2b535f2379a295f05471b342385ff5f35de9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..064acd4009d048651eb8e9944e7f2d2cb284063e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38ae5c9c89a7079289beef8aa7636fcedd61582b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e097f2229319f6e2ea507751215edd5d9cd6b4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f452a48ead09f25983df7d66ffe7cc7577fc5289
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c2b55ffba8bbcb66676187dedde0b51368dbeb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_token_classification.txt_chunk_9.txt
@@ -0,0 +1,34 @@
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26ce719d7d8f73994da769ab1997277865bfd935
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..207dd1da2e5e8e1cfebb83f472f902bb632bc8ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0ab52f065639092820ad05b33750c53206955b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6fff6bf9db6c397f7ae25f25753dfe7799cbe94
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_11.txt
@@ -0,0 +1,17 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e2f33bede1d1a46743a450da05f2c1389b8e304
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_12.txt
@@ -0,0 +1,17 @@
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..654fa0adf91dd0d4c92a42ba8ec808d1722531ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_13.txt
@@ -0,0 +1,20 @@
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f3fcb5648bbf324f902839eedade0de2f288833
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3072605235303eb14135f45d7a11dabcc6ed9507
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0e9b15c1139c987029df0eab1573e8e5aadf67
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33a0ef91ff224d1dce4e98ead56c2ad7ce1c6821
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58fd17995723091fd70bd66d64c8f94d78fa61a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..321c617667339eb7a68423bf34533ec89a0e2a69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4ca0b3773dd5a1de5d3f2a5404767dc43932589
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..162f597fe02197281d96a1ec9df42009dbc24191
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_8.txt
@@ -0,0 +1,29 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9616f68fc9be1c7ba924c7ef7eb03e80fbbe1d2c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_translation.txt_chunk_9.txt
@@ -0,0 +1,26 @@
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..307bd634a7b29d5537c2458ea691966e83b3ff02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..635a3813c60ebb9ec61e3650d40d03e9c8eb55b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f5c06e767aafb0738e641eecd00c2bc238147e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_10.txt
@@ -0,0 +1,25 @@
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5383b22859ceb0506c42e1769b6f154c54628593
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_11.txt
@@ -0,0 +1,30 @@
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..200bbadf27c0a895bdf3d23db5c0518594eed47d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb8f62c25a13fee79fecead74f46c39eeb98fe36
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_13.txt
@@ -0,0 +1,29 @@
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..041d5f3a21bced3278446434183e8bc0615c2e7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..217f53f3e9972a7893e1f5f853e9728ab029f287
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_15.txt
@@ -0,0 +1,22 @@
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d4ff7a58f95dd23e8688894c104db8b11c7316b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_17.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb29c329dff11adeaa7d2e0146e60ad2e54d1c97
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_17.txt
@@ -0,0 +1,26 @@
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_18.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc3baedc3596f19986831cb0f8b72d161f2f7e7b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_19.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9236124cd45e03f8dd8c3f33be1b6f947e1d9b02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_19.txt
@@ -0,0 +1,20 @@
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53f92f5893397562a505e171dc013f8543781f0d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_20.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60b1a3c6a001d1ef330cc16ebf9177e4ebbd0ccb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_20.txt
@@ -0,0 +1,21 @@
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdff64139349291deccff35f8929e5241c9f9798
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da91c345d355fea7538d05cbc29a3397af321ada
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
+
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..877e76caa3573815320201c941daaf7967e647c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81bb783605451c4cf1d04321fc008a4e30bc7018
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..870da2b60b89bea1351f2a97849f42aea6e65ab4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c80ffd54afd8dde6bce5922d6c39338afdd82ca5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_8.txt
@@ -0,0 +1,22 @@
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
+
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..685354b8ded8349c74c431e64d390b587af3aa31
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_video_classification.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58279783996df99c60bec8a5d9806e2da64ab252
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c799969344f092fd75aa5564c4c2d9fa012512ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cf795c7043a6ccc9a9cd3d1ba9cf9bebe611158
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt
@@ -0,0 +1,22 @@
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a5d0687f7436d71cc3882a1a4b701f519b0738e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt
@@ -0,0 +1,34 @@
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f921cec9b23974f171192c9c6269d838e5e6eb2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt
@@ -0,0 +1,19 @@
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54d60dab464b4daa49affa9fe2f0fd16c2df0b22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt
@@ -0,0 +1,19 @@
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abe954365416fca1a6c18a0504b623f77bf49ca9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df444c6eedb2e9ae9ff575bf762f3bd104f92b57
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt
@@ -0,0 +1,19 @@
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac0dd78fdd4cdb717288cfd8e694c49618effe0f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79da97d7c47da12e5128d0ba35be2235869a9d2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f061a77ac6621814d733ac6da6f8ca599af5df6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..305f6aac65fe68fbad90297018d84013b738493d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e96346b610ee8f51e4ca60f4e9ce1eec7624061
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfdb4df157e1b93b451c51aefb3a500fc05fe44d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddbaaa5af6ac74aabd4f869b0576afbaa9f132d6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea89dedda0fed7d5f3f1b24fa167f9008e70fe09
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c7bc2a4876787aa56858c3b27587278546bfde
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt
@@ -0,0 +1,20 @@
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90134ac23f93f59ee7310979fc87272f8d9b7686
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e98884fa2217aec36736cdef3bcfdc3662381ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cce9841ec659ba25dfdba956f61c3bf544c6abd0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94bc18f47edd2076fdde57514d7a25f7aba9f072
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8956f31f446ff8f6b244bbc1b96e7608ca94091f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9fa44851a46b26ff69f4d098c6f72a04f7fb97d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1baabd3bb6f21c8b17e74836e4966bcd3742ca76
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5d90ccb67a96f7d70a5f2b717651e324f6872a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc7450d29ce20e7a2d9bf0fb7dfda2e2fa9640ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2517693c1a5631e3a3f959f4fc80a553a291fd73
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fca346009deb6893750bb3851403430d51bca69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68dab6a72c29fc2176d791186419464db303dc2d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca6fa531b7677a10b6427ec1184d769242c5fb4e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..702d81e7e96fe2a9120aa8873c971245e771f2cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt
@@ -0,0 +1,22 @@
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d07cea6702f716a3156c1597e3cd7550fa75ba1c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__config.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__redirects.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4eaa4d4466ff0ac9f4dae7a284f5d6e97224f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,113 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
+
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
+
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52e79ef93459b5876ec5a2ceda5d5569df127670
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
+
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
+
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58ea9deffda2e3b8bd15f9504f55e4e7f4c940f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,101 @@
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
+
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12685013d0ac223ef98b918a5f1253de471a825c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,137 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1eecb08191660e994736a341938979de6d86236d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_4.txt
@@ -0,0 +1,132 @@
+title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca9f973a7805f272591ff9a91c959b802ca277e4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_5.txt
@@ -0,0 +1,131 @@
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f75f3e737149cd327dfac767727180859d19a45f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_6.txt
@@ -0,0 +1,130 @@
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..064785a3a418062f652def86a4d92959cba0a03a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_7.txt
@@ -0,0 +1,102 @@
+title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/__toctree.txt_chunk_8.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_accelerate.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e75a215b828f769c04356d8b6dcae78956e40ede
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,89 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cbc553fa0069a479cf8368d86806f59929d3f62
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
+
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
+
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
+
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8a96e46054b4411aeed60cd8f8248e02401cc61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
+
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
+
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cddba3913c2e16b94e572d33ecfd31d8c26847f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_10.txt
@@ -0,0 +1,53 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
+
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4724c2501fdd23ff585599885cb353bed2af8284
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_11.txt
@@ -0,0 +1,48 @@
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
+
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
+
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa58b191b041a83b8c7f6f8f0c2a22fbbcb478ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_12.txt
@@ -0,0 +1,60 @@
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
+
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
+
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
+
+make style
+and verify that your coding style passes the quality check:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acc222620bc7de85ab9b076155c48e4917274d91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_13.txt
@@ -0,0 +1,38 @@
+make style
+and verify that your coding style passes the quality check:
+
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
+
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9240ecae88329ffd02045ea7251db3311aaab98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,65 @@
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
+
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
+
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
+
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
+
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..167e9835cdcc1621255f62f8c09014147137b6d5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_3.txt
@@ -0,0 +1,70 @@
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
+
+   git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
+
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
+
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
+
+Jupyter notebooks / google colab
+Local python scripts.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd4d989a402543d4edc8bd145a86b7f8f6dcd87d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_4.txt
@@ -0,0 +1,58 @@
+Jupyter notebooks / google colab
+Local python scripts.
+
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
+
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
+
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
+
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
+
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08f604224dad14ba55745143705f035dd5ba1d56
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
+
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94397d369685984a4fbd33bdfc559513b11cbdd6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_6.txt
@@ -0,0 +1,70 @@
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
+
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
+
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..762921e2ea5bda9b716076518c3d6ad6148b2003
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_7.txt
@@ -0,0 +1,39 @@
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
+
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98d707af72adb2bef10f6cc84135458b66f49fea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_8.txt
@@ -0,0 +1,66 @@
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
+
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bb3db6de61903c87a433996e3753af5cab1149f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_model.txt_chunk_9.txt
@@ -0,0 +1,46 @@
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
+
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e04408b2e307230ea49a63f209d6a61e99fb09
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
+
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
+
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
+
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
+
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e36f6ce5530e935caeda4b2fb639697e385fe71
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt
@@ -0,0 +1,83 @@
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
+
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
+
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
+
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..448c78f96e40d75aac6bbb66c9f75c0918923eda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
+
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
+
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5509db9d07f0ec25c9991590567dd12d66e44be8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
+
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
+
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..887da9192368df7e1d306647cbe4c911f0f28c4f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2b9e00a921aa23e7d8ce2be82aaf15326f6149
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_2.txt
@@ -0,0 +1,66 @@
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
+
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
+
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
+
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
+
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..704f79695a15f526264631dd03435860ae63c712
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_3.txt
@@ -0,0 +1,90 @@
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
+
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
+
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c205699c24b63cc2ae428637a9948870877e4b6d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_agents.txt_chunk_4.txt
@@ -0,0 +1,65 @@
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
+
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
+
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
+
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_attention.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12bfe65cb566a30a141f14020f69fd3471f44604
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
+
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4e5c67efb7e6c417103555207e1f0a563ad8d90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
+
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
+
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
+
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c93bfe5ace891ba51f23e667cda675defec09755
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
+
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44f48c25bf5c45b3743b063cff5f9dceba5ac979
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
+
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
+
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a5b969b10ecaa2e1e2bbf89bcef0f53d2f64537
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d67a89813be74aa2f20032027eb595559f58b756
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_2.txt
@@ -0,0 +1,83 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01e7cfef45f0007a6d879b1b0cef317ad7ff6a04
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_benchmarks.txt_chunk_3.txt
@@ -0,0 +1,50 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
+
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
+
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_bertology.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a3ec657c3a7176526ec66941579549dfe87cb22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
+
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef4dbc62d7f38e3d61ea0c95560afe4e2e86a87
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
+
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
+
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
+
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5f81c32a8c4aa29b7f2652bd7c5b5504eb25f33
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_1.txt
@@ -0,0 +1,70 @@
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
+
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
+
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
+
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec95b6f5862f9d967da44500eac468c7c7519128
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_big_models.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1871ab5892f79bef94d803624ecee42afa32fc8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
+
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a96b3282a97ea2603d18f27f9c17e51cd66fdf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fc119b2f79e22f2aa069ec305feb2df93130ded
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_10.txt
@@ -0,0 +1,48 @@
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ab55ea70dac4537cf81c5d0c980f9c8003d456f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_11.txt
@@ -0,0 +1,66 @@
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47915aa58ac7600f36cb34cce4bda3c2bd66929d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_12.txt
@@ -0,0 +1,14 @@
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153602130528a8571bbf5e10c45ff281652ecc95
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a04999f361f9ea2ff629b91403459ba86870f0a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,67 @@
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d42deb0cd4ea67f6a0eaae5871cebbe13852a445
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_3.txt
@@ -0,0 +1,81 @@
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
+
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
+
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec8e8a3f28ae12a54cd411c36e1cf040ed356604
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_4.txt
@@ -0,0 +1,62 @@
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
+
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
+
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67e3103b3242be8789121db844c4b8073eb590ef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_5.txt
@@ -0,0 +1,115 @@
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+schema = get_json_schema(multiply)
+print(schema)
+
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
+
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0636a4a074e236925e43f6801142691854af0be1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9652d9756593e94bec1db8d420657c4f38bea761
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_7.txt
@@ -0,0 +1,65 @@
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fb10672ab0a7e27bdc6d88416ea523a93497564
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f40e2a23592c20543c0d37e96e36645d87bb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_chat_templating.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..875455e4ae8742855cccdb35072b7ed3934e3771
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93a493a3e9cfac5d4a5a69497c5ca66000103df2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b9ecee6e1d2fb9d1b8cf4f0f31e4aab2f9158e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_community.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1305a481f2c68329a5eb17ae18f64cdb66ab17e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
+
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
+
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
+
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f21b06b39d31f4871682e0adda155a522836d6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_1.txt
@@ -0,0 +1,86 @@
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
+
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+
+   pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+
+   make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73c590d6941642d4acaa3647964ccad68933c785
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_2.txt
@@ -0,0 +1,63 @@
+doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
+
+   git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
+
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
+
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff8889e02588202a65a51bb65ef93675342c3f4e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_contributing.txt_chunk_3.txt
@@ -0,0 +1,54 @@
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
+
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
+
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6841b0f6cc4d1681752a35dfe3254676c5adbe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2baccc749f26e1d6444f309aaa31a3574c63b625
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
+
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83df7925ef6a35253b1ea1cb2bf8ebd2daa2cd41
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_2.txt
@@ -0,0 +1,58 @@
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..769683ca2ee842a443aac60ae827346a5fa4a657
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee7173b957caca5274eee4d50c788c1b857c90d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_4.txt
@@ -0,0 +1,39 @@
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
+
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
+
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4aac73e77e6ba63bbc195d3aa6353c339e18a41a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_conversations.txt_chunk_5.txt
@@ -0,0 +1,31 @@
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
+
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d64d256f13945086f2a0872512a56feb7417519
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,80 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
+
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
+
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7d8a78ce2b02a303ba1a66a93c14d18517b1abc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_1.txt
@@ -0,0 +1,57 @@
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c53a3d4d0ebacfda53045b94c9a01b2be9f928d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_2.txt
@@ -0,0 +1,86 @@
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
+
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
+
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
+
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7be8ca661ba427a8cea327c0ed3b3b3112d5a409
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_3.txt
@@ -0,0 +1,76 @@
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
+
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7d2edd740db0e7356ba2228a4fa39371bbd6396
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_create_a_model.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
+
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb5bf8176e36fc8342ae022febd0a840c69adee1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
+
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
+
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
+
+    self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ed3c417bdab9f0aed7c5fc08e9ea186ea3a03c0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_1.txt
@@ -0,0 +1,94 @@
+self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
+
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
+
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
+
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..def047aae579259ff018235d9687a7e04938005e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_2.txt
@@ -0,0 +1,59 @@
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
+
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
+
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
+
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d930f42d5e426bfa2c63c5d78d0922ebb4f3f2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_custom_models.txt_chunk_3.txt
@@ -0,0 +1,69 @@
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
+
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
+
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14770641cd9a3a2052f9693ba59cd7a3fc35011c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
+
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
+
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
+
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
+
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
+
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..571321c3d92f4675a34ea6f72fe1e2d5050e21db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
+
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
+
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
+
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
+
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbf324eb1a5379cd3456a78fb2688ad8435638f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_2.txt
@@ -0,0 +1,61 @@
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..396b21491cc8a6e65d387c26c5763265f17f642a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_3.txt
@@ -0,0 +1,53 @@
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70c35fabd79671cded226b1f14a9c2a526f7acf9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_4.txt
@@ -0,0 +1,84 @@
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
+
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
+
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
+
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
+
+                  *** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
+
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abeaf5e7e0b4833d782b1d2ac99f65c9f1bda081
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_debugging.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7444eed55726aca30ee76fd1bc5d75afd606ca43
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
+
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
+
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28712f19523f5a7035018f6ab83a5cc387054e6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,50 @@
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
+
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
+
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
+
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..457528706a83c774ae860ba36c922c6a3fa10e34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_10.txt
@@ -0,0 +1,53 @@
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
+
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
+
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
+
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec123f395bc1f6072395ac96dd26abcc9afead8c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_11.txt
@@ -0,0 +1,82 @@
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
+
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4640384dd3ffeecae6f1fd78b6de5424d1938c58
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_12.txt
@@ -0,0 +1,65 @@
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
+
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
+
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
+
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
+
+Save the script as t0.py and launch it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_13.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f84cb918992eb66c2ea9c365f844ab2247468755
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_13.txt
@@ -0,0 +1,50 @@
+Save the script as t0.py and launch it:
+
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
+
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
+
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_14.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2352a1ca51a7e19ea74c77c6cf3dc98dab9954e6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_14.txt
@@ -0,0 +1,22 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
+
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b15103f390fae42ec3d330dc49b31aa4ca81282
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,57 @@
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
+
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
+
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
+
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
+
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
+
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf6c747aaac67d62f9dd260b7bbe9c27942f50e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,65 @@
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
+
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
+
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
+
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad14c44ac5adc36a1aa25bcb5a0f8e7feffde6d6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_4.txt
@@ -0,0 +1,47 @@
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
+
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
+
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
+
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d834f34efe17e084a28d6d7b9a5d22abac66dc99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_5.txt
@@ -0,0 +1,93 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
+
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
+
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
+
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
+
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
+
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f63b2fa12a0ed32eaef2b2195a58f8d703d734
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_6.txt
@@ -0,0 +1,77 @@
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
+
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
+
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
+
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
+
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..747ab710a7bd692c6592cf918b08cefa57faaa88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_7.txt
@@ -0,0 +1,55 @@
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
+
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
+
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
+
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..935ab2e76a0429a66d9cf2ce94d4c6f929fb1417
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_8.txt
@@ -0,0 +1,83 @@
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
+
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
+
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
+
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
+
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
+
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e6f2ce9343eb0be1102834673bf3a3ca2b47e47
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_deepspeed.txt_chunk_9.txt
@@ -0,0 +1,86 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
+
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
+
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2645dc0d9cf9125a758c19d06a1ff07d3991539d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
+
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d491fc68aac98b83d229dbfdd72b460d535690ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
+
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
+
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
+
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c75029ea4f92c8cb1c4384ee754db60ad13994e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_fsdp.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
+
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
+
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b371970725949b6c812a378b4fc6c9d2b68fb66d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
+
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
+
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5c14d198d7be43f6291ec26a54b6bbccce5c889
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
+
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
+
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fb860c6e9d805cee0e5ff4801f26ab426642280
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
+
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
+
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
+
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b54dbee4762f360e68fd108b19f58ad5a0e2fca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_3.txt
@@ -0,0 +1,63 @@
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
+
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
+
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
+
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b13b797dd8a874d9b4bde1e2551a471ba6a3a50c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_4.txt
@@ -0,0 +1,61 @@
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
+
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
+
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
+
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7829ca553f4eb15e0facc6f886709bcbe43ca3a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_5.txt
@@ -0,0 +1,62 @@
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
+
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20a73545205d9dd41be8bb052ec842de6282b71c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_generation_strategies.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_gguf.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..578db765add050a76ca50d5404eb46cbe3e10df8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
+
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15bac728eb12a68a395d4e5096e7966992e11dda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
+
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7de7aaa7ed6b53b3b93306149988cca1502bc177
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
+
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..317bfb1f15524a2073b62cb4aca18e0aba3cd5ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_2.txt
@@ -0,0 +1,63 @@
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
+
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
+
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
+
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48bb0d85c8fea5668b81b40551964022afcec319
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_3.txt
@@ -0,0 +1,60 @@
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
+
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
+
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
+
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de6328a15ca5c4f224e31de4d0a9be22c9832bc1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_4.txt
@@ -0,0 +1,57 @@
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae3428d52adc5e4df1c9379df0a4224dccfd5061
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e84cb320ebca2dcac6ba869c0a22db442b67600a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_glossary.txt_chunk_6.txt
@@ -0,0 +1,49 @@
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_hpo_train.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c4cd1c2545b13b6236decb1a549e24c6b587ddb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,98 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
+
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
+
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
+
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5778094c0d2a4711ac82cc510e23872380646142
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
+
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd8ff44815aea880bdd1f88332ff90b9baef8b2f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_installation.txt_chunk_1.txt
@@ -0,0 +1,59 @@
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
+
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f420c75ff7637ba777251b0a8af4655dfc5d4316
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
+
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
+
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
+
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
+
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2121474bd7f78ec4baffef377a913aeb2882d5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_1.txt
@@ -0,0 +1,70 @@
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
+
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
+
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
+
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
+
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4decc7d566758b2e1f71b95a1309dd18e5677cb6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_2.txt
@@ -0,0 +1,56 @@
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
+
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
+
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
+
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
+
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a567819f2a852cb6ebae05d210ff07aacc7b33e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_3.txt
@@ -0,0 +1,59 @@
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
+
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
+
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcefeb1329f7e4b1603974b9a39df8ff16f86940
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_optims.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63a7c906ce770973446723ed6bca810504b72179
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
+
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
+
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47ff74eb2adec9903ae7c42cb462a33025a5e6c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
+
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
+
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
+
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
+
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
+
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1d53ae8cd57282efb96a8e39fbd2782db452536
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial.txt_chunk_2.txt
@@ -0,0 +1,58 @@
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
+
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7d573ffad9c4a1eaf0e6db152570cbd08c1dfe1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
+
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
+
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
+
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d0427e95e92be353385bb9c3cb41f7889631e0e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
+
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
+
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
+
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab6b53a88fae9da7949e60a0f78755eebe399014
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
@@ -0,0 +1,40 @@
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
+
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
+
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..396fcd9a86f470acca25ba461e8accaa8fde3bd7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
@@ -0,0 +1,33 @@
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
+
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
+
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
+
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
+
+Falcon
+PaLM
+MPT
+BLOOM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abebd4a2426f7bf6c13cf653dbdd3bea3acc93c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
@@ -0,0 +1,18 @@
+Falcon
+PaLM
+MPT
+BLOOM
+
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
+
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
+
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96db6f4d764cf66540d13133bab02d5b22799e3b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,62 @@
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
+
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
+
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
+
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90dbda286aab3c6248544f6c4cc6dd9bf5ff8507
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
+
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a2595db5b6103ed8bc395121b22df47aaff9d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6b16c7fa8559e610a6855a8708619c256c733d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
@@ -0,0 +1,65 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
+
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
+
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
+
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7988678ce6c765bd6780b2bd5734d190672c3995
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
@@ -0,0 +1,72 @@
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
+
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54cce4a42d0dd41b52848ae97a53fc6832e3787a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
@@ -0,0 +1,33 @@
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
+
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
+
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1301f73a7af6553729a9b1cd9959af0af78c33fc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
@@ -0,0 +1,39 @@
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
+
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
+
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb7e3de3fb4092e5740512bd8ade35f524788a50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
@@ -0,0 +1,49 @@
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
+
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
+
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b00ba1674e1edae9e5bdcd17b7e59ecc548e9d85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
+
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef2419e936f18d6cbb6f8e2cfc63fb3c7f2a2387
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt
@@ -0,0 +1,83 @@
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
+
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
+
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
+
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
+
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
+
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c6c62baf1d59103d5fd9046f4054737ab3a590a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
+
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
+
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c868a710cd8b10d489173598e851bce68bdd4dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
+
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
+
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b2597c651b6fc4e4f92afcb50db9df40ea5428
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_sharing.txt_chunk_1.txt
@@ -0,0 +1,77 @@
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
+
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
+
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e45d285afb7bf6a4210bc634d9fe00ca94ac69d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
+
+Computer vision
+
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa7831319b5f0db38043c15dc7739d9fa9ba527
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71ad8583d99c564fe0cdcbd89bab37b53f071302
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_model_summary.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
+
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
+
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992d860fa734c3d87ee3707441f6951223a11e7a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
+
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
+
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
+
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
+
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55ccff97e5d52f8b141fea2a7cc2892f03cd8473
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_multilingual.txt_chunk_1.txt
@@ -0,0 +1,61 @@
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..523bb5c9ed51e5651fe7f972a9b92d2e851d1b17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fe39c57d8cda86de2a91fbc824382338fe6cd0c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..943fbb95650936a70ee5be8d1654c727d5a48ed7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_notebooks.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7a7fbc5cf1a0cfec216ea06d3aa1d47906421a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
+
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
+
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26429e78be0a4a35e3b1b147f2fa0c644164eba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pad_truncation.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4db7007b97a7d72684b75dedf834caee0c770ef3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
+
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd89b68e8c97a346699f69cc7747f62ed59387d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
+
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d10a188871a57daeca11d5be7c4518e5c377007
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_hardware.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96b228800aab0682cb73b145aa69def9ed2268cc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
+
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
+
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
+
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee074135cb41f23513078c70405c85c9f081ba60
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
+
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfd0e44653c82ad6cc613a9d473eed34bc478a0f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,99 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
+
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
+
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44f942a6ae2e2567ff57211db7f2bf9533aac574
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,82 @@
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
+
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
+
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfef5896a0d95f232aea76b8b503e8a7e5ec59ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,60 @@
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
+
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
+
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
+
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..052f611e33364c1ceb68111159eb716593baa89f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,49 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5b447da6c6d12a3df5576f1494ae1c5869a312
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab3ce971aee2be7686a38906066cdf7dd7a712e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
+
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
+
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
+
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74bfdece37dc44484399ebf6df4ab9e8f8a91b69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65f394fc9ff6992d6b32a2a86518910241a4a6ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1e37259e78f86034c7d4b6a3c01ab0d887d7546
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt
@@ -0,0 +1,52 @@
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68a1b7dcc518ccf393e8dd25d23536198056b370
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt
@@ -0,0 +1,39 @@
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9226492f56fe8bded56bb98764a5aff1303bcf93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
+
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2a2aaea679817fb1d1a22306d86ed0bc9a75ae7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
+
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
+
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f76ee094b94f08584c43d78d190ffd94b5bb5e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt
@@ -0,0 +1,76 @@
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67aa1117f214acdc4688cfb5cf2edc09cb895489
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt
@@ -0,0 +1,100 @@
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
+
+              resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7161a852c22dfb07d9c91081cfc812e19598a153
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt
@@ -0,0 +1,32 @@
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
+
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
+
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
+
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cd292dc6c955a263f0cc3038c42a7ff23922f0e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation. 
+
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
+
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
+
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43885da98907eacb862433febeade3661fecd100
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
+
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:  
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
+
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
+
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41944f12c09a75c3acec0a97adc24f0b146d3c70
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt
@@ -0,0 +1,70 @@
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
+
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
+
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
+
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient. 
+
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8e50734e10fd335d6e2edc6948ae580ac408685
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
+
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b728b131175f0636f5fab2b55046b91ba9dc532a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt
@@ -0,0 +1,43 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90fca2d4ec9e74a3aca8f510e139d5af7ec7f066
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4138bf5444b9c3c27a0015c279408aef5b07278
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt
@@ -0,0 +1,57 @@
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
+
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
+
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
+
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84725477538e54b11b4749c2c88ab25c843b74fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt
@@ -0,0 +1,58 @@
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
+
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4698b2e5ff0f014f7020c7e0433c9de631fee96e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt
@@ -0,0 +1,58 @@
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
+
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
+
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ec60bfb7699c74a11212f3e05eabec08b1ffb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
+
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
+
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5cfe2f1e07473e34819be2ac8ce13cf16459639
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,61 @@
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide. 
+
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7367752ce2f8eef36446d7c9a2a8095d5ad4846
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
+
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
+
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc60342d962699945696426ad3af600520adc17e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
+
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c2e1f407ff61407b67cb388b82cecd88628bf99
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,58 @@
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
+
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
+
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
+
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed: 
+
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaadbb04976f51f30651b72328b16d3763dfe197
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,53 @@
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
+
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9bcd356fd1ff653a9d5d74cfe7d9820399bd032
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,61 @@
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
+
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
+
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c259d7afcbc19f29f22fd1de12cd699d86b641c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
+
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_special.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c49c613bc32f763b8dde05c2caa45fc9461a2bb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
+
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e994024b00190c978e56d32878ee38368d538
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
+
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
+
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ff1aa4bd5a1759e1f429319d8e854a5bcd1a4a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
+
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
+
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a2999cfc12aa301b95dafea6eeed9fb93bc3f29
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a46d875b38325f1def9d35939f7038a05888c5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
+
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_performance.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3eec54e090ccbeb20e244323ebd29183ebba5547
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
+
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b365c39369c7bd9c8a3508bf9ef21153edea3f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
+
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
+
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
+
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
+
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67d7e40343044e6258845034cd51e8cab0eeb1fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_perplexity.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
+
+    # loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_philosophy.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d0bddd6e69acf7995b77a899440ccef9e7e643f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
+
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
+
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
+
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
+
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..102bc1e1a0bf894ad84c8dc144d50809e95ba280
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
+
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
+
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
+
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d0426c57f6bfc32d36ff3ebadd3438386660cf0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
+
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
+
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
+
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95400f6063ebc726eabc4ba8fafe6b81bba47a46
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt
@@ -0,0 +1,75 @@
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
+
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
+
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
+
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
+
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f582534353e93018cee637d24f177b34090c3d33
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
+
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa8c8280af4794499464eb2614f520ccb134f46f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
+
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
+
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0143379bdbde29d59e22532811a4d91549bddfea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt
@@ -0,0 +1,58 @@
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
+
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_pr_checks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b34665986ee285000bf0b5800b2026cd3e06fb7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
+
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
+
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ed076dbb28bb14f58c642dbbc92dd08ee33586
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
+
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73e31ecfe7357faa06f1785c95772d6d7b3761da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
+
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54caac79c0237ac5c3fcd419bcd3c8272925ee61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cfbe3f01cfb2e267669e55b386a7db41d7fd069
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_3.txt
@@ -0,0 +1,53 @@
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
+
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d10aca5af383e2db3f07fc951d9c4f3c4619b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_4.txt
@@ -0,0 +1,71 @@
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
+
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
+
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
+
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9567d45e8adf378420c55ad960d5f09fa23537ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_5.txt
@@ -0,0 +1,82 @@
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
+
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
+
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
+
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
+
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
+
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
+
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf266c901273b8a9e57c96790a551a501edb16d8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_preprocessing.txt_chunk_6.txt
@@ -0,0 +1,58 @@
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
+
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00ed59bb9a69d6ae1980ab62f18dd4d26f3c0a9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
+
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
+
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1ab4b70dcbb457621d201125fc86e0223644e1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
+
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
+
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
+
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
+
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
+
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d0646093aa8e240c6b803f0480e582aece68846
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_2.txt
@@ -0,0 +1,78 @@
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
+
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa743980fe30f61e32e7a819b6698b7c8697ab3e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_3.txt
@@ -0,0 +1,78 @@
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
+
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cb050c415173d5c844612349c8a80a4f3e350bf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_4.txt
@@ -0,0 +1,101 @@
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
+
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20d6e53dd1c36ea52166e20e859283dda3132d1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_quicktour.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e9892af893c2a991259d7bf13cfaed95f303a49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
+
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cca238dff9e9eb4ce19eb1f2a616c8ca8bc1cde
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_1.txt
@@ -0,0 +1,82 @@
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
+
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
+
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c0bab12e2f69802c0bc7a6a85a0a29b314d76b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_2.txt
@@ -0,0 +1,83 @@
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
+
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
+
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84eea0d3ee367572df4374b8f13e1c64b479ba90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_run_scripts.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_sagemaker.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5772aa8035cbb1937bb798e5f57f341b0c455a9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
+
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
+
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
+
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da18f7d1b844d40b430a7f4142cd4f313e63feac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_serialization.txt_chunk_1.txt
@@ -0,0 +1,78 @@
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
+
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
+
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b014ef09510a32803a8e60db4efcfcfc3719a73
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
+
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
+
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
+
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
+
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ee2d5568f4bfec539110eafaed0eaed603ca0da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
+
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
+
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
+
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
+
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
+
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
+
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1da194938845984643610ef28f43f5a2c4981ddc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
+
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
+
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fba1e2b83db606006b585e54660062983c2c338
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_3.txt
@@ -0,0 +1,50 @@
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
+
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
+
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
+
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
+
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
+
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bccce711605b30ad592f11728db280557a7b0c1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_4.txt
@@ -0,0 +1,50 @@
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
+
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
+
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
+
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
+
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1898e4a1c0a12e19832bd564d0c7e39bc09c0640
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_task_summary.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
+
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de78fbedbacb2f0ec05fabff0b096739036e2a85
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
+
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition. 
+
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
+
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
+
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42fd1764d2251add8d0125b0d9f12b1757e470de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
+
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
+
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
+
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
+
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride. 
+
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
+
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c81bb9c4b18ac5cbb6600ff01d5b9a450eca80cf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
+
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
+
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
+
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
+
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
+
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
+
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cfad63f85796271e5f2afc51cffd918a1f7898b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
+
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
+
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
+
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
+
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
+
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
+
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
+
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
+
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abca26aef4e38e2066159690bfb366d372e8ec5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
+
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
+
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
+
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
+
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86b457dc8ffc64f76b055b18c867b1172bcbae2e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tasks_explained.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a316fa38b6bebe09ab788c6b8c2553e93c13f01
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c72cbdcbb28c268dd0e3a6bf9ed81e62dd62d0fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,87 @@
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
+
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
+
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
+
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
+
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d33a18cb92b5db2e7e9d3a31923a3daf75715f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,82 @@
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
+
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
+
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
+
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f864b16f0e13f18490157494086d8da96b95b35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_3.txt
@@ -0,0 +1,72 @@
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
+
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
+
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
+
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a26595687c4d78c0f89dcf6af3fc0da5e456a09f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_4.txt
@@ -0,0 +1,98 @@
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
+
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
+
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
+
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
+
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
+
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f821b3384cd9a6e2ad798e2d173b05746dc2a29d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_5.txt
@@ -0,0 +1,98 @@
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
+
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
+
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
+
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
+
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef9d06ba230295b536eaf3a10a6c7858b79c4ed2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_6.txt
@@ -0,0 +1,100 @@
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
+
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
+
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
+
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf07b0bcd32f266837cc095847dd4ee3f03a659
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_7.txt
@@ -0,0 +1,76 @@
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
+
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
+
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
+
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
+
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f05020843b0bdf1741d29deb1f2eac978bd16b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_8.txt
@@ -0,0 +1,116 @@
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
+
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
+
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
+
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0574d9dd58ce13b82681b06aa8e8ea48d473d427
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_testing.txt_chunk_9.txt
@@ -0,0 +1,71 @@
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
+
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
+
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
+
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1e3d42e22a43946684fe701da6daf6743620fd3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
+
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
+
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ea06bb72973d4862795e37520d17501d2d8b09f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tf_xla.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer. 
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
+
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
+
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general. 
+
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tflite.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10d6b20dc5810ec98cd50ca38028dc7e19921a91
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
+
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0d7921699ac47f581e5594e07f74755776f3774
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
+
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
+
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
+
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8f7464d63a0733f73886802a226784340acddca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
+
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
+
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554e177f4b9cbbafe5500d216538591f001fc70c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt
@@ -0,0 +1,40 @@
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c701037013081cc96327885f24667b25e68ec7a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a2e50a0b3faca8fdb4a71aee412c8d0fa408780
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt
@@ -0,0 +1,38 @@
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
+
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9220661b878751cf3caf35c46a6ade20f24118
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2fe7208b3a6623c34f5775f0071688c18e65f92
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
+
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61f66e3758a8f9086359d776fa7e6b4dcb775c31
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_1.txt
@@ -0,0 +1,79 @@
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
+
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
+
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea52a32a1ef72a06c865c56f8727aeca2b8316fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_torchscript.txt_chunk_2.txt
@@ -0,0 +1,66 @@
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
+
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
+
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
+
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a4937a887c8d73cba8459b085b613299b2b9977
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
+
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
+
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
+
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
+
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
+
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ede7ec035294dd8abb1c7437b79061d23cc57883
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
+
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
+
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
+
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
+
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
+
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58760a7a9055dd769302c532c6b000f01ad9f6ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_2.txt
@@ -0,0 +1,93 @@
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
+
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
+
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..807cf7d5ffb6358c90d98d512e639e69d13719f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_3.txt
@@ -0,0 +1,74 @@
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git. 
+
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
+
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
+
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f688d07dfcc1164171df139e4d13e3b7e8924b98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_4.txt
@@ -0,0 +1,106 @@
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12aeb80e5bf0df0a695e0ca2c926bae271a1b854
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_trainer.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5de0c159c44685cc6f83474bb95bb0ebc5de257
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
+
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cef133a2f96800d53183e163cce6e8788a66a0f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
+
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d86e633502cd8a2cf66d5960686bfe4728960ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_2.txt
@@ -0,0 +1,79 @@
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
+
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
+
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
+
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
+
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d0fe57fd6d927295a7e1ab50f6d3f4f83193b1d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_training.txt_chunk_3.txt
@@ -0,0 +1,78 @@
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
+
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de39c5f41da0ec5c096d7b64f0e633596404d5d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
+
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
+
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d3e4406627f16f59467352ff8d6aceb94b74406
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/_troubleshooting.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
+
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_audio_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_file_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9378d8ffe6dfe4d9f90050d632bfd2c2a1a0fd3e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbc489a18ced0c1a9a9841fe3b71a80f9499cd6b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775ce44175f161cef843e794a2e27d81d53b513a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_2.txt
@@ -0,0 +1,126 @@
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cc5c12ef27b0268e75fbea7f192ca8b3baf0f23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_generation_utils.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+- update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_agent.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..788e8f286b20ac6f788f4ef99614eb77eaeed12d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
+
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
+
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_backbones.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cde1b5b205f04ae43291411a7bbc19f3c7a73bea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
+
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_callback.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..914898783857334c68b7587c6834254ba0fb7808
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
+
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
+
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
+
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_configuration.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f39666365c9363c7c65c1077024bfefec8e019d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_logging.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a818ee26a8d69b0fece49fcae19d221477c39c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
+
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
+
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
+
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_model.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a686f8babb6a7b2e9617ffe3ff6d2e836b47c9d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
+
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_onnx.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6294e6269d2344ebf4a51e8c458c0b32de7a22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
+
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed56043d2b4b86320b73f9956ea121bf314b6348
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..436f311ffdb79e03f447a352d47bfab27e448776
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
+
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bff5e98883a4d1c9525c533144b64b28b2fc037d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_1.txt
@@ -0,0 +1,91 @@
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cff3a449e5974a6a670921ad6b538fd4337b20dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_output.txt_chunk_2.txt
@@ -0,0 +1,56 @@
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3db0f81c176b8827b9ccd77debd5f915b8b7ad5f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
+
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3bdd1cb8d77dec47abe6dc180a1e1515e5ee860
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
+
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2015ab6f005e19100764bb774265d52e51b0c739
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
+
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
+
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b6f77f3335cd72d6d6fe940e35d3b987d013617
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt
@@ -0,0 +1,136 @@
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
+
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a9ec40a998567da0708869624fa2c29bf8d83b2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
+
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
+
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c74db150983db2d98d929c001ae105e2d0a8333
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_processors.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
+
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_quantization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f06d4f5eeaed4c5f87655fb147c5d0b3025f8f3c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95582c89cc7013da6dfffe75462743679d06c8d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
+
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78b98dfe716fbfeacf0894171a5da066889cb494
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token). 
+
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
+
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
+
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_trainer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7741630488f2fcedc580d329ce9237d611421ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
+
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1931d52a08caf8d55d2b5c9cf619b2463313142f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
+
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
+
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
+
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..819e395d5e79e311f13cbdb21d1230b2ff92c674
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_albert.txt_chunk_1.txt
@@ -0,0 +1,86 @@
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
+
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
+
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8855b17eba5b61d08b17bd9e58013b7dcb879cfc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d35128b2da8019b992092bd6b70900693190c61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_align.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_altclip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f8d8988be560d924ab8d71503c2e90f9e1c1a86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..542ff12bb7a282e4307a1ad0759a92e1874e842b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
+
+ Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
+
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794c923d2449ea0b6a1588537740ede542a5c125
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477660aacb89b972bc2099520674f542b0e8349d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
+
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8f033e8a59720d89c2c053b7cae117f531c4e33
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_1.txt
@@ -0,0 +1,104 @@
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..972add6554d77cd9047903a02f6145fe9fdfdee7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_auto.txt_chunk_2.txt
@@ -0,0 +1,68 @@
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f143a146e88d837b9935d1e40c3f3c3406b5d3b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2d1d9525bef41de4dfbab3a9f526f91ea57f695
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
+
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
+
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f060150eaf2c7ea7c34db730c906796304329ccd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bark.txt_chunk_1.txt
@@ -0,0 +1,87 @@
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
+
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
+
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
+
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d6c0c973fa4329aba22e55e7024251d1525ed9e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
+
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
+
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5ba74d65172b9c419fd944c2565bd9ef419608d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bart.txt_chunk_1.txt
@@ -0,0 +1,75 @@
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
+
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
+
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
+
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_barthez.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f3b193529d218fb562cadc9959233ef18db1c13
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.  
+
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d699e305db5dde4f1fe1fc071e4b3f900257bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d006be1d73385586faf93c698a34cf1baed09d5d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
+
+ BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3016b20acbc0d0690aad51758dfc3fe9e2d80967
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_beit.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc7d5a61372ad88e12cdc2537aec003cbd3ab6d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
+
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
+
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
+
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4bbd57a1197ff4b017ad4bd1c853a4615b2446
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ac393bccd0851fa52ef2deb98dbc37c926c365
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BERT
+
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14fdd2cfef2be37cc7ea5b6ff3cbef00c6b45e8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..970297e3f02a5cc46ebbf0957cbcec01bd04e578
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_2.txt
@@ -0,0 +1,84 @@
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
+
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9f4e7eddd9a91b205c11d26043f19d29bfa450c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bert.txt_chunk_3.txt
@@ -0,0 +1,55 @@
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
+
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db216fd85516a776ce583f8642525024317eecf7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dfffa58dbec50568135dfedbfd2bff207830ff7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1cd5b636a113ffbc4bffc3d6ad9915126bd12e5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac44817c5ae7b075f8d52cfb5fb29de213c47b3c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c768bfe2f141f8eb3a78ab1c6ca1a3334dd9017b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3b8ef44caa1c71f6900dedf1f7749ac1a939a81
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1d7ccff0434f3499336620c315692bd292526b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
+
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30e7afae642b43c3115561662916b36b7bda1b2a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
+
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5db0b870b534e949b8d69624054095100425cdaa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f46e7e9f3ba09e416c794e16840d21e30199db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
+
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
+
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
+
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bloom.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f99dfc468ec4184ed17682d20dc71a7992a0df9f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bort.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eada2d1950fab7872518fad0142571a869597973
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6abe40c57de036a923d6a7248f718fc4a53f8df6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
+
+ BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7753bce3f6a4f52011b0855a21287675dddcb49d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt
@@ -0,0 +1,50 @@
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e291bb128a7d7ef46387c841e9f3bd82f950eb8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
+
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
+
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
+
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe667857b2bf93177ee5d58151b23276591204a6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_bros.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63f9f7ad40d1cc98cc1b807480a35e3eab01194d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc9a28188cc3d20f5a458e66b07087c0d865e86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_byt5.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
+
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_camembert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d516af79e87635156fe91d79586af436a51bc746
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
+
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
+
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a14027cdbaed09ae4e3f0ae52a85a3a471adbc1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
+
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
+
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20f651a2b0539dd4fa6a31c6c3a99aa0daf459eb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_canine.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..773845e76dbc802ec512fc0967354b533f41d1f6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
+
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
+
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a650bc16928cc2b46a4fd28745b8382d4ae60def
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
+
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clap.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651c89af78bbfc448db279a2a241385d8698e566
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50fe37e75d537357e1f91736b4e422d6453b6266
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4642f17d897c4a7f880fafe4ed9051f161219eba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clip.txt_chunk_1.txt
@@ -0,0 +1,91 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
+
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
+
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2280fd021f192fcbbd754d766b2ed2e6df5f0d5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cca4fa81f20e7b4e5a43bc4a0cf603d46986b874
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
+
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c88caf63c23f6e40a4d60518fb152ffbe685e19b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
+
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
+
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
+
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e35053afa5ddb3f453f4820043919006ebec4e39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
+
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below. 
+
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_codegen.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b542cd5463cf1de2abaeaf51902053651df302c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
+
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
+
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..044ccb4f2ae8eb05d5fbee6507bef620ddf16f28
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
+
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af428b21ed527c709631e85f27d3d88890243cd9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cohere.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4b2cddff53e127f244ab98a7081de9449895fae
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
+
+ Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e32c7932b559ab71672efc1291e4d49d5c1e81
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+ConvBERT
+
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
+
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
+
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnext.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de0ce09b58e4da4c790f0545f40aab900f5d5fcf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
+
+ ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
+
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62cb8753dc6f845e0aab7d64de81e5fbaa4d4d00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
+
+ ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9561219451f8bf41883c98d80b4984f3b4cf160d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
+
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd37eb7179fabc3861c0ab3377f70088d55cf218
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78204bbea8ce172ff8833cf0de1b3e088866869b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CTRL
+
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
+
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cvt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..accd09683e0557fadd2ffaf760454536124605de
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c60869fcbfc5cdf6dd39b15066f5eb5d9161551
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
+
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
+
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d795b051c791ca3570dd36cdc540e897bce1f21
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
+
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b378454b16e96220ab23f0982e507f1646cfce7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8d94e67f02f76597591f9b9c63debe358e2038d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2459cf0036771f734b52a4a126df11ed2974deda
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
+
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5a8874781a9d50e28c35c4992bd6dfdeadca44d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
+
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c796af81397a8346efe8aaab3920b90256fe5b74
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f0baeecaa3ba7cae904f4875f79eddfd00cd3b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deberta.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0414acdf856a4798b3697ca69b041cfdb6f68b34
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
+
+ Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdaaa7b68a1febb9a477ffb2def1bf87300698df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
+
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b869070bbadc56197c46ba8e3c16a137790c9a2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deit.txt_chunk_1.txt
@@ -0,0 +1,64 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
+
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
+
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deplot.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88b1337ae1eb4f22cef55c457453ebc622bbd1fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
+
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff3d2a3f2cd17870af62e77aa535df431d469855
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
+
+ Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6a39e82094b6d1d26d02f14a59a690963d5e2c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
+
+ DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cea57d9648dc350f9b556d7f8b562ebe0bfae139
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83b5a51e0a1a67ffa50dc9fa73773ad8225f90c7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16bf8645bbeccec1a60eb1ffd793fd2d58cd7608
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
+
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d52e875eb53f502ac1ebb6df993bd9755c4adf1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
+
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
+
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95b36f3388b2d7ee7b1ef13559ff19b90986e530
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_detr.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7f2d6600e4966956a863bcf443417941d9ce83
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
+
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
+
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0e131bd78fa9f0970b31aea1d89b5487460b39e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
+
+ Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
+
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b418e8532c2e35fed26cc984483c115bbe2fe61
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b27b28d5258f323f9fb874d43f20b110ad93ba6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+DistilBERT
+
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
+
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31b3f3a1e3619e3561040daeab70cd47abecae55
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt
@@ -0,0 +1,95 @@
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
+
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214caaab48358e8c3453346f58f7442cea5001e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c6d83687397924fd229efebcfb00df1effcafe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
+
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
+
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e77329b1b28ebedab7a2ce9497caeef7ddd613e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
+
+ Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
+
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Document Image Classification
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
+
+Step-by-step Document Parsing
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5385d40c67bb8e9191c730af021be54024e18a51
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_donut.txt_chunk_1.txt
@@ -0,0 +1,89 @@
+Step-by-step Document Parsing
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
+
+Step-by-step Document Visual Question Answering (DocVQA)
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
+
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13a791f93a0bb29ac927a03aa216acc67b24e4ca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+DPR
+
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
+
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
+
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca4189eba220efa014bf79caa8c9a73e8304f15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
+
+ DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09d9964d0c37a2c3173694b7a12b75bb54b5414d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
+
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c74294f34c682dce828b9d39961a1007f5b15cf7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+ELECTRA
+
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
+
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5db0214287b52018faed39c5ec35ded4b390f4a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_electra.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
+
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
+
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
+
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encodec.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..583a1d565adee2d951848f551713ff0708ed85c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon 
+
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
+
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de9c6c1af3c8cf2fe08ae8453bb974d5b75f6db6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
+
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
+
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c02cd3f08f336ac713e3a6ce364c611aa4127f68
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
+
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
+
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0b770107a8d4ebea40b81a3cb339b48f5ebac4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb5834c760900ef862166ba0fb041abff53b391d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
+
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12e47744885afc032ddd1b68fa8d980d56759fa0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10269cea795b45d8f12538c69548406f26a75c7d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_esm.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
+
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_falcon.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..437762e2371d95148120ef2cff07e89b9db88899
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
+
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
+
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fed74870c28345e8bdbce0adc4a053f75d43c89f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
+
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac9fa3db5809d7bea19d700e4af8a19eb8d39158
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b95904ca2212ee86cd779efda5c74e535b17f8c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbb237cd0dd77dc0531d1bcc599e6cee8ab023df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
+
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
+
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..540ecb53fbf9672758376bcf0611e1c757310f44
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+FlauBERT
+
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
+
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c7e698f36a60b53ccaa4473e349b22fc528b96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..210536026778a1b913348b1ca953820d38bf55f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8e451c3fa90eca12aeb8ca9a43535908d609e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd5106041b29603fcff622fe866dd641c49be6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
+
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d61f8c074df05271160a547ba365204776a9fdfa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+Funnel Transformer
+
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
+
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
+
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d534dccdc612a8e2540b5a8290b3b7b1c72a4af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_funnel.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f31671252a8b57af9dba9d3c0b8bfb0b7bb655
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
+
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
+
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c130247a929e00377a9d1d650611bcc5bc8a0e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3afa411d55ccd9ae8f47ea73776f47f9f62d62d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_git.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b07fee84014ced208c73902ebf0b9300fee3e5b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
+
+ GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_glpn.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f60651d42ce0f66c3e41bae4ac29691389f7110e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e974be9e77f43dee52ed11bac1861647f19f324e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..271acdf6e9b846614a1ac1ffd027501d1fbc66b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+OpenAI GPT2
+
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
+
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27fa67110418e218b6794315b158483e63252b78
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e55b32ace582e244f9f222a611380b9fd4e08d1c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33bb2c93fad4b9316bfed39bd58f1028e76d4bea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e2a60f3014ed8499238709fcf030f625d8e09f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt
@@ -0,0 +1,68 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
+
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4c81e67747d93a015561e1f5d8d3dd340de2f8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b02c62f34ce686c880cba8c3be3ca117cc781a73
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..264e8d305ee2bd520f57ac31ca6e32342e111a77
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
+
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b845a0c92fb0e975df309530be43598dacf0016
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55cd13f19eb1f3a9b4362b7e2c3005972de301
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
+
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c6a62327d144d637c9e7b74dc1c7e87f540d84
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
+
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
+
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90119b6115ec720656eaded81b8ccf8be119c575
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptj.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9edf6735300edbac487cb644b378b773501cb174
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
+
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
+
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3188381ae1c2e36ccc71cc47ab19d8ab47126be7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50dd1c0756b70942d8bb51b24481fffb16bca6ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd43a3a495dc335c706cb397df5cd4c76cdedf23
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
+
+ Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
+
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
+
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
+
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ccbc3767ab7021bec6113e6b3d76484c411a541
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
+
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
+
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_herbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dd4acae3b54f041cf13561ad2ca5ec80fc709c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
+
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_hubert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a8d4182e176195191b97581825b2d58bb9b5441
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
+
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ibert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..498657deaa792c653fd4f3d3dc9be4cb8ca11a54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3d5dbf212cb8aca6bcd9a36b357b360ca7f5097
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
+
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
+
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66f87d3498d700d3d06add5a514792e1ab6fb9a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
+
+ Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
+
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
+
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eef1bc2356a85d6ae7d9c809d89424333a14fd7c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt
@@ -0,0 +1,71 @@
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
+
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
+
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e00059f9cfcf424f640062d5cfcedd02322f0334
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f31947b088be922acbd76f45dd9978bed9f9b75
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
+
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f898af3bfb3fd52a42986b26ae2e08e27c2c259
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_informer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3daf2008f240cdd213f013876d031db5eca85958
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b05b25a8877b807ec05ca33c5255eaa7922daeef
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
+
+ InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0e4956cef62cf6d512b30234bfbbca60f2af3e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
+
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
+
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
+
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
+
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f4272318e7030e4d802960ecf6dacbfa23c34ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jamba.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
+
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..193a50552fc2efc995c12f219202e088b6948aed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b61231c3076741cb9934496200e932ebf73bd6a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
+
+ Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
+
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ad4ab495c542ac55730a971cac753706bf14dca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
+
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf468adc64aa3b0b25ddd3cbd3870419586a55b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
+
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b69cb8ca06eb0a867f3b516b4bef8e639bc4c1fb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
+
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
+
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
+
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f2a7e8c8baad241b2a70e0122a29af4e458ff8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
+
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c32dd5c07796fb03309bc2d4cb37773d53b6495
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
+
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
+
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
+
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
+
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71cf2d410e366862a786e9453a118cea8765fbba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac14ea5037ff3ad9e895eb44ab6651597337259d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
@@ -0,0 +1,79 @@
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
+
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bced8983415ecb4e78ac6be68fc04857e8cd246
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
+
+ LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
+
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d62903df34bbb708689bc67de769b70ff9a3487
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
+
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529a4287fff0c600a02e3157fbf5439d3edafc69
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
+
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_led.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b91a39fd7264e045f4fc326f7c92d08f467d0b84
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
+
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
+
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
+
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..567d02061d7e1ff493d63ddb68922b273e2d1dce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
+
+ LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ea407dbf09de421f02d6f2e4886de94f6ef7b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_levit.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lilt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e2fb98edad26fa9753dd4bbb621e2abf624225
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
+
+ LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc1a7d777324342ee0288aa64443e3012014df11
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dba456dd6939a1c260996372c78a21cfbf99571
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
+
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a92642dc818961f8a4a0f3fc52051489b4c4f2a1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06da3a0073238fc5a93274ef34b8ef230853020a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Tips:
+
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
+
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3645a11874a2228b12d5da1428551aacfde79163
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama2.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
+
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..384e3a115bb328f27ee26e5430bb52e510859922
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
+
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3fb58d9e2562bde7378031fd103c37bbce9356
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llama3.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e05baa168215920f9a91b7f8266af5eaed50e770
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
+
+ LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73b963e7ef36bf67f1732d347a0a0998581e78d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
+
+ LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
+
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a8979f24aa3a16be7b7b8330db856e9fddc5314
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt
@@ -0,0 +1,52 @@
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
+
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
+
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..037929dc05733597d1b23d84410c1fb6f0e39764
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
+
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29fecd75cc39f94e8b25b56bc92931f4b10b7910
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Longformer
+
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
+
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
+
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
+
+0: the token attends "locally",
+1: the token attends "globally".
+
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b64f01efd64d2333fca5abb7ba80a6fba9860154
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longformer.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
+
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
+
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97472789b5d4bbe275821f9027727f1eaa64fa17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
+
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
+
+thon
+
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bf0f4b10d6aa0ec62d582289e9cb49358a80da4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_longt5.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7f555ce928d83ecefd8ed5d70ddaa87382bb16e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
+
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
+
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
+
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
+
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
+
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
+
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c0f498481910a67e28421586541ac78865e040f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_luke.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
+
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
+
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfc70c818ea5f066f1e8c255820c337b55dcdc86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
+
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a295af36c91997fa1a16a35257c21881dad4ab4a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
+
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92b01125fc68eabd5c7cf570234ede8ce8bf554d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7f48e097254205a7db44c38ea7c419bf9ac4c0b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mamba.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c00ecff4f9cb9b1eeace859bdb2989a6548070dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
+
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
+
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
+
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48e4af1ed714508499dfdaeb95be62c2d7158c15
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9afcf9e4166023685eadf24472c973e91f9822
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_1.txt
@@ -0,0 +1,63 @@
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
+
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0534eb06c9a67162be5dd985121b99d139898dcf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_marian.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22f95aeb0e76d54fe9d52d57e9290536f2611790
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
+
+ 
+ MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
+
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5c23bb500ad6676495a0a9d638d8062724527b3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt
@@ -0,0 +1,89 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc1803f13f680d195cc721f01c14491fae0ab652
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
+
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d4d9704b0f443700b2bbcaeff68229cc87dca10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
+
+ Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e2f9f77481461cf89174d8b96fa5d033e481336
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
+
+This model was contributed by francesco. The original code can be found here.
+Usage tips
+
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
+
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_matcha.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d5cf87ef382570a394cb9f080fb4bb8bb1944d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
+
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
+
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b74b15be2a40a53799512a38980aeb01750304ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+MBart and MBart-50
+
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
+
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
+
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
+
+Supervised training
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ffc099aaa81f6a9189759543cc14adf31207311
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mbart.txt_chunk_1.txt
@@ -0,0 +1,97 @@
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
+
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
+
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
+
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
+
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mctct.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e10fa7431ba3873c9cca6ce519ef8729e07c3b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
+
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mega.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88552549b3ef33b4445c61587dfa362ada7f207e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
+
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
+
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..408489b086b1242f4b57203f72d1035ef5d3a7d3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..499f92917823b5dc9d4574239d4988e53887aea9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9c58d9cb562385b017c90023c269baa3f6781f7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb6c3913bfb6075a716ed9c5218909be2201297
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
+
+ MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa5b35ab18c64589627638238b9ab5851ede582
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
+
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc2aa6e8472944b8bd149e437821b808c95a7918
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abccee9f41efa9dd3f4b56df06c4573a70c2be92
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mistral.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a21e95ae1af28e664c9ea799ea2e9df6e05419f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
+
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length. 
+
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93f94cc09ae1831dc6c6ccbbedeb7c11932477a3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dcade92f99fdc1b3512aef4656b8fd3d911e264
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mluke.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6bc67f0224589399fbadbcb1da2cf45ae71f6b1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
+
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8599d8bbc7405d312f09213f8f5cd6b17b71ca17
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
+
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
+
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f847ebf83388dfa74e9963c7d5999109516dcca5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_1.txt
@@ -0,0 +1,90 @@
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
+
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
+
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a298864d68e8e21077f83fe8b4dca1718058d35
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_2.txt
@@ -0,0 +1,86 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2dac8d5e5c22066c9a4fc547e57d877c7442b09
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mms.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil. 
+
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..730ba7468f9df71fb54a11bb480159c491eba8a8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
+
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
+
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd08de49d026ca3e694b293b756c61d8e7fd13a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e26e14c0c860f69c01b40c91c13de1a0a0255da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
+
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84cc12faf138bea1ac6da0e0b1329dc28666abe9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
+
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33498b45792b250225bb4a58c5426462d732e91d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3547aad3cd12c6f64ebf52337db61e11af51ae86
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
+
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
+
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
+
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f0f9798fbc019db44c329bf64187c43c3ec36f9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2483bd1cf7395f409e2f541c8c0b668efb527ef3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
+
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76a8c5ec386b5782e1678c9749aecd0c1da4ce9d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
+
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c246eb7c9e2c456b344876f4b1a7a438686b3da5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
+
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mra.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d818c84da9bb7dfb6ac53ca3642a7ef215301ff7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+mT5
+
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
+
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
+
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8cd4ed0945f56cda7e97391c5b48b3893ea355e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
+
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
+
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..192a2c3dcfcf3c4bf4592122bb580a1c45002d22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt
@@ -0,0 +1,91 @@
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac99c8985a4eb0f489f1374fbf1361f5bb90a3af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
+
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed09f920492e8498ec5d956da4085c7b1344e567
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01036d75d7ace2e4ea2fba8938a0c335a012edc2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
@@ -0,0 +1,86 @@
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37e2d038686371cdaf03230071491de0390d6523
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
@@ -0,0 +1,63 @@
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
+
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
+
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
+
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93c19f67abebe5716bb1957aef57e0b044dcac10
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
+
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
+
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
+
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
+
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6af3e50e032622fb4cca5f25a89967b0c9284c8d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_mvp.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e148c25116a6543e16b5ded77fb687898b19924d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
+
+ Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
+
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nezha.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8110b8eae1667b1242df7504127ba0bd14e57edc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6642343f49291f439ed272c61374fc412d9930cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
+
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..157eb29558a4ff74dfd4e4074a7cd5e212baa343
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f916e96934d604bff9ab03e15004e3adafd784f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..872428c8692847e8bc97feb66fb0acaf03011beb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
+
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aaacd2ea20945d3885ba373e57f5cabf113f6e53
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nllb.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nougat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8241d3197cce33499cd202c3814db3486182e73e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
+
+ Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step PDF transcription
+
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
+
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477c74fcc24fdbd2481e0699f2b795693b865d59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_olmo.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076e4fe9be4478ff5faacb07d3c94783c79ba1f5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61999b733ba1fe815703590b195746c66c82cec4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
+
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
+
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
+
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a74049df4b416deed0fd1eac4bb69e077a7257
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
+
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3a391b503ce13d69e70214ffce3dd7d594c237f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+OpenAI GPT
+
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
+
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f15ec3503eee2f9e18fbb7fcf87815e3663ea88f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
+
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77112ab0673dedc068a3e6f074a81bb01f1cf2dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7d22fda927ddb7af3062a27070d9dbe07027ab
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_opt.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
+
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f4dc0d3eb7621b72ca3f0355ac518088c7ffa59
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
+
+ OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2de2d014b40207ed5102c48be9c66f42559e5bf6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt
@@ -0,0 +1,61 @@
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
+
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
+
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2b2616b521547d84029329261aefc67e90979ff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
+
+ OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..240dea19dc630f5add6e46cfc1e3a2ab0c33c6da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
+
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f7e6d875541271ccd35a00668386b97c3671ca6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
+
+ PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
+
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
+
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
+
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06eb63ed80a5f41ffaa809b1ce1aef80efd77bdc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
+
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c840d5ace3591631fc11d0c0f03286ad750607
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
+
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
+
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afcd76b98b1c153422c9894c730fe76879f85053
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,100 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
+
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11b2bf0966b8af78eca23e83b9afc8df2aaac817
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36fda5f1545ded5871177aa187962cc8a33cd0e3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
+
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb3bae0cba70731b9deeae2f0017c7dcecd0766d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..057a163d378dad659be24f85b80bae07f1fb15db
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbf0950ce27ae2cea65efc1fc86e6e4b2107f0fd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt
@@ -0,0 +1,88 @@
+Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
+
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a29ab371cb174a30a4fa35f2abefa8c9ca513582
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
+
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
+
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0091df311ae5fd6a09382799b1d2ededaf0f6ca1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..586385bcdee5e5b90ec2989a32c1b84eb50cd8da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
+
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a872b1e22cf173e8d3444cdf266c2afd251d3a39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi3.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b74239c1c5e1d8eda5884fcfc36c1c4009353a5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
+
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phobert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23f7107aaa727b5621bad4b78c097e36b6c9b9c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57603bf68616efbabee1f3087741310a43ae1b87
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
+
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
+
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_plbart.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1129ab18f4b2d1f0b9c600e4ec0ee936e4dfc34d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..450c058a7bf8024c518467b5f72248a866b52982
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
+
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
+
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2ec42c80600ed5257598e7f990b15d5a1c51a8b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1,79 @@
+Pop2Piano
+
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be26cb0ad8d728a966abc6d21831445d4aea3dbc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt
@@ -0,0 +1,71 @@
+Example of processing multiple audio files in batch:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f877b00eb3f803915588dec892a4ed3e3097d9ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+ProphetNet
+
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
+
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37913b89f5052b2cc200071fdbc0b5f901d45b8b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
+
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5952c7dcb8820c03024f10a3ead525053f4e394f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf5a3513b06c48bcac916e1543c2db67bbc034d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
+
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..532f733a386fea2865b33b5b83070f13a60eedaf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05aa1d064d060d5ee723624774145f44ecc54df6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
+
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
+
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
+
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
+
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41225a61ed26a921dceae7a2e4cc19e9f1cc1152
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15384749174f8c2ee5612a027b42aded2b2a2ee1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea8105413310476a191258f73ff5992a09b0d1e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
+
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rag.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a628094a5a30283aeca8d5c56f8a54612823915
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+RAG
+
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
+
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_realm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d33ba5c135b806c653f635e183ba3631336c8ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0c81ba40661338f645d8570cf0397a3478d14fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..761a50c7dbd478012e38d2d666a40199c2c089cd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Reformer
+
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b93a5d518e513025feaf49cacbc729677a1fc0b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4515faea8eb00e90d0cab845bbf62294ed1471f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b800c3693156700086fda9513ced77e561d7369
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_reformer.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_regnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df2c1fa933523d41213e49304ea4debcb9b5d0e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
+
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
+
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rembert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f18b213b8b390d28db49103120f6c4532e584ad7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,87 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
+
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_resnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8224ebee571f719c90a0f91afc5e5d181e57b8ac
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
+
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_retribert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f51a9c3d6525e0bb395eb5e924ba7871aaaece
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c837946832536e1811155a4212442b2fa271a00
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
+
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
+
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
+
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce4caa631ab8ddf5f080cf04e13fb9996acc8246
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+RoBERTa
+
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
+
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28f78ae8df1ffdc8f0a60a8462fbacb1557d6c2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roberta.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
+
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
+
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
+
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c7aefca3184c25dbf076381555a158fd12a8c1a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba21092a63da3791909377544ac13a33972bbbd5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
+
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
+
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
+
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c55fc87f62c896690f12d9a7d15ae323477bdcc0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
+
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
+
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c94c627454ccfdaa837104df49828f3574aece
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
+
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
+
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df8b2d75e848430082fe91be1590457bad0b2287
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83fe1c6be550657694afc2629f17c8ff9b2bfb88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..424fcb76da01111f0df2f3276f57a25f642107c1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image. 
+
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
+
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository. 
+
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91f86d11b669d8a526f4099005de2d3f68e73ea7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sam.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8aafb5ebcd6fa49b6b5ce2f16e5d2e0d674a46f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ec05a98eef85fe600f48eb9684f7b86b1e94315
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47ef151e8784ae0dde60c2a9248f72f6bd6e1a96
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b78bcd23d9eca757dcbed3fa33f5d9b669fed09b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d78e045624f825eb59e231939e4e93179a66aba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43542052d961ba0ab8194d9fd69a3d306de9bd88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
@@ -0,0 +1,56 @@
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e23e81db147809c363e55e2a883755a881e00e0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
+
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff84d6fc82631dcaf14b5af57965432becab7164
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
+
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3601ff8bc0c3d93fff241738f609c90c6c4a640a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_segformer.txt_chunk_2.txt
@@ -0,0 +1,61 @@
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
+
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
+
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2058fe107dd13ac24e6f625f05f9587664942317
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
+
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe972727f0f0b027cf0e82c32b1b3a10d2ea3c9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..928fe7f14906fd7e0ba244a22ca146e03b76751d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..991691cf8b212f6151becbc2b56e59a3b837656e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
+
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
+
+ SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03adbea7a4babffd32045dc454e498b4eb7a9108
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_siglip.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cf700cead244abce6ff37f4547bd7e0acd3b891
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
+
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c3f54156750647f8e5c102b859b27b6e2d1396d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
+
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..466cc9f85347e813c810426502b1ad13137236f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
+
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5accf8df59b31690e2f6c8a8f9635356019f56dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44a200ca7a4cb87b2571b7ef4dce781dd8761156
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
+
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
+
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c02f9cb543606e90341e20cde9094b277853a75
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_splinter.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5ec1c95efc37b2def73ee884fc4c96a83b28710
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
+
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
+
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bf4f5efe23080c214cabd24a4c99f82c1bc4473
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
+
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0fbe484400f7eeb1e7e46e9c71b84c411ec3e9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1930ffeb81c85eae026891d6e228d518ed37d1af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
+
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
+
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f488002c4ed63f3213ddb9e66a6c0039ab3d3f64
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
+
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21e575c1886f0ebd33c60cb668f84a10396e4c14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
+
+ SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
+
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1150e5587516f14466b84f072c0f2afca4181856
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8551fa61445b024883b2c35d365920a78ad7c5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7efa18d57b98ca8b1be241d96c02903cefaf3968
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
+
+ Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3114077f1adbf9d954485cab28d390e0655aa4bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
+
+ Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c9c6f58c44ceebbade3484992ec822c8c11d428
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
+
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cdcc9ad44b57c42778996724f63404f27a8e749
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a4c4eb18c2a09f466a5345a904e33cbf5402811
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+T5
+
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
+
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21499a07c96380d6c9d6ec7ba9942f9a4bcfdc7f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
+
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4433202be5a6ac9880757d0f99b752dddcf130ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_2.txt
@@ -0,0 +1,64 @@
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
+
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d23ed4b8e8369141b67c26bb8fba7ce6429d8a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_3.txt
@@ -0,0 +1,74 @@
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
+
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
+
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
+
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe7f395598a9537843f0071d9df0fe881121fc0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5.txt_chunk_4.txt
@@ -0,0 +1,80 @@
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
+
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac2247d928e5a65d35c6504a925d3301a8fa9b0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb644b69cb79d0c6ce9cff7a851cfa13f64359f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
+
+ Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7846661e59bab9d4d357e30d24f3b57e097b93
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e42cbb9498d89cb8fbc0de45fa0e52a60e4db74b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
+
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
+
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
+
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17dd5225403032f7be45935d7c80db77dac056fa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f1bae7c62277feab68282f64ea9ef3321e480f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_3.txt
@@ -0,0 +1,58 @@
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
+
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
+
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0612954087043a748605583f96a98c5cfe7e5858
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_4.txt
@@ -0,0 +1,83 @@
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
+
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdbb3fee73011d877309f94ce8952b94864e4a39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_5.txt
@@ -0,0 +1,99 @@
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
+
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
+
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
+
+         # zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e7acabbb95b6a19af8765ab8e8d288c8add5e6c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_6.txt
@@ -0,0 +1,45 @@
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
+
+         # forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a194134bf47ea03a18682d3d7abc25eb9b5aadeb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_7.txt
@@ -0,0 +1,51 @@
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
+
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55fb8cc1ac53e31689c3c0b2aeb2977562fd688f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapas.txt_chunk_8.txt
@@ -0,0 +1,89 @@
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
+
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37405f42c6647d75ca88501940b118e3622ea385
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
+
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below). 
+
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8611f5f37d80627f0b3117f65e6432ba88f94ae8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tapex.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
+
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
+
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ca5c63c5dc9f07f577f4a9e354cb61b182fcebf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
+
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
+
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d9e4440a838d57b24442e00a8d67d08fa7ecef9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
+
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58d50c68e1bfe76b560d588fd59a8a1cb34afc90
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77dc65f07181c1dd890aa743656f39c2517da5ec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
+
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
+
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
+
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a74845985748f8b2c6600391f9885127d6678da
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
+
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trocr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1fec78d9f3fd13611ff7bfb3efdc50049d7fc5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
+
+ TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05c38f3d034c37a8525e802c1fe05d4783256eff
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
+
+ TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
+
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11c035affd9367314433d29c2a36c248f002c7a0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50a60323e35b4797346069b663c60d70bfff0800
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_1.txt
@@ -0,0 +1,72 @@
+TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83e302b947c6c5e0f0c35135f74b190a69df1d4d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9bbef1e72ee109f97f4cff734cae8b4c0f5057
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_tvp.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba8be1773c4e0c9860095e70208a0046e64189f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
+
+ UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
+
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9389853d2e25aa4fd194d3de84d7bd545799bf63
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_udop.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
+
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ul2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98a97464d147f4aeb7973c88f07acae764242abe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
+
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_umt5.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9075e321e211ecb52b67e443c2eef3fc7d6221ad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+UMT5
+
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
+
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34dfa52a293d5da0792fd83af66c90c56bdf518a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2d053aa504a812a3897d72bbd2455428b56ef7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_univnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2aaec319f9c3e2300779b428545fcbbf1f82eb9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
+
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
+
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
+
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_upernet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45d2554cc29afe53da6ab547ed7a639ab34282d7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
+
+ UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_van.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..294335c5c3c5e1cb90ed61db081c5509d8073d4c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
+
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2184703c59a2a07d9d3ab70b06699e7ac7e68135
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d4b70d6c9cb4f71d015a9223063d03b7e24bbed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting. 
+
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+For multiple turns conversation change the prompt format to:
+
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d0e45a671267c75e81d45305a1c7b113e74c13f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
+
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
+
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66b7b6b6255799241ae548fc47b80cf96b0777ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
+
+ VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e6757219a50f60d8d77d91a1fb379f0da136bad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_videomae.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vilt.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bded32333c913e06f744cf5573e8048118c2219
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
+
+ ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3bfe3184bbfafd4e1d8917de1112a2e6f133bf5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
+
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2059f224447238109a0058081dacc5aa4429d271
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
+
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..708d529b122a361ede58cc8a3679577b0a930205
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
+
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcd4543da106972394b37d6c5f7f6a3944f93498
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
+
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b161ab3743d674c208830cc9e49a04e0044b5bd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
+
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
+
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3f900aeb0167eeccb145c1fcd309277a9501cf3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c58b944f70fa55f9759f96b1996a98910f106af1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+
+ ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
+
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
+
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..beb70d4a35c6ec8741bc473c4f87280433fd4ac3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_1.txt
@@ -0,0 +1,50 @@
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
+
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6cabd0015cfc25a566cf280c68deb85d138e5d0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit.txt_chunk_2.txt
@@ -0,0 +1,55 @@
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
+
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..309a68eeb03f1f61f543b858a6c31ff07f923012
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
+
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea9b45ea9f3d78672520a88b3b02249d46b404dd
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
+
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7055a765917094722df8c509b48545f2c4cf64
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
+
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17168d1d3a0111483aa4b194d636515334708ac2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
+
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
+
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..347afe540acefa3d90632e422efd55b81bb5e026
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2ef641c5ec0d555718fd59065c82d88ad28c31
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
+
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..333d7037b0f3d201497a555ca601aee3bdc07de8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
+
+ ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2fba96c5cc34f84b1cc87665454c1c80c770a88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf2419b03d4c4320a74604554e68c34d899c50
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vits.txt_chunk_1.txt
@@ -0,0 +1,57 @@
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vivit.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1bb6c0746958312080c460444700de37a3a0c14
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
+
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2796f55395a2bcadf2ea1a02cc4055ff3c179243
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
+
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1e42087f40bd6d7216e7ef7d19bb2b63aa3af5e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..459ab182e0df9da99bd5c537c6983864d2791815
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
+
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10544e37d54faa293f170072c9ca1256939be1dc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
+
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
+
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
+
+     with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
+
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cafca0157d3f9e8c65f3c91849a4c9c4e45f73c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
+
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2c88d234f8749594f5924b2a6dc4bc28478c32d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
+
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..808772d40e76b5e7183a181906f25894c8138d27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60ab46c45478898c6dbb114e6b9a0f0d4e415229
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
+
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
+
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e651eca2b38a506e9b02fdb0cf090f619e8ce859
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_whisper.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xclip.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63045083afd49a44951b18666572a7c9b7f2cd02
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
+
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xglm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddee620a7fd42d1dad42b86fb2b6a6b8085e965d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
+
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff10d19d7de4e2b92971ac1aebdcad3bb3ef2d9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..284ac122aec82bb6ddba3bc8b6ada67d7e26e562
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2806d512126ad4caca32fe418cdb5959a897ba54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+XLM-RoBERTa
+
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
+
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4809ea604b01e5a29c3cb4b532e57d75b25c65f3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
@@ -0,0 +1,95 @@
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
+
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
+
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
+
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4fceef7b3939ed6951944bdc6195556babc297
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
+
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85446103bd23bf49c17bad63d7f804f646ed6c25
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
+
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
+
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d22567bc01aadff98dea75a54fe66c002167113
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlm.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9aae531ee429c6156b2ef4df5f926e2cb105012a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+XLNet
+
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
+
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e78ef24d91ce0fb2fb4c51f3698a3fbaeb2a809
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..686b45cc5e9df0acded8ef58b344383814df540b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
+
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081fb663edb86a0d71d107c0c964e9f08c46d574
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xmod.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d40f1b0aed13a9c764c30731a98abd73c717f976
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
+
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yolos.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d46793070c3b0f63d10dacb0dedeb4297cf201d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
+
+ YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
+
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
+
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yoso.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8708bf959715b5296ad13f55d855c4e0df4b301e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
+
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_aqlm.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcc1f0e69babe844e6bccd2264c58da5badf7057
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
+
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
+
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01209319ecb0e94e9856f6231f24fd60af8de325
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
+
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dffb6988ddcfe4257f5f7f0a67279bb6c3e86f1
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
+
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26b6e7d2b9455df8613560f27be4e81126d85c6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_awq.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+The parameter modules_to_fuse should include:
+
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
+
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
+
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e34c25ef25bd44561edddbb86ea1ad29052fa2b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
+
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77c488dd911cc7fbc80239eeb652ff8808800041
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt
@@ -0,0 +1,71 @@
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
+
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
+
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8211ff4e35747678a68dd421239ce41899ddd1f2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
+
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
+
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
+
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bf698f165658f6d267c1903bdcbd4a5e05dd82
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
+
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
+
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
+
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
+
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
+
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9da9db1d4a806cc75870cd1128dfb8d1a7378af
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_contribute.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_eetq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d03ad4250bb6171588639f1c4fa947141e017c44
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
+
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
+
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
+
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
+
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
+
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28446c068a8e6dce328fce4582b5394a14d67753
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_gptq.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
+
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_hqq.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48e29e2c812f6a83f4aabb3ebb8f237479e417d4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
+
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
+
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_optimum.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_overview.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9d5a3b62a1b805f46683ea1cfab0c77904b714a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
+
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_quanto.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5212d4784523b0997ee2888ea4c9c139df9a339
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
+
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ff9340724df06fdacef93f8251ad79e0a6dc106
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a3ec76da8d3ede431aef4fb14dc2277280b4bdf
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_1.txt
@@ -0,0 +1,61 @@
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3be394c4c33bbfae6d9e64f2f444e1d5e1995f4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_2.txt
@@ -0,0 +1,91 @@
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3540ece119570ebb98139a2d86244309793449d
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_asr.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
+
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e9a3e604b3d75524bc2aa41cd26a6e7008a67c3
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2eff7eb365d8042557185e89647a78560f8e1cfe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de0f62d56a5d8e3048a63ca19fb953b09aa5be5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt
@@ -0,0 +1,76 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
+
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41a81a47a2a055083aaac82e631757db87966203
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
+
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
+
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3642ead60bcc1694c6f50ebb6f5e63863a195f27
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,75 @@
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
+
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
+
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
+
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddf7bb1af513bf21804e86c5ac775d51d0093dec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91057223a78c327cb56d27fe177208990783dcf9
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07a053d213e45a7b81eb323d33bb026721f5b5ea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65e71ddd239fae25384fd056ff566bbdda10e2b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt
@@ -0,0 +1,84 @@
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development 
+
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
+
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
+
+     # encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
+
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bda3b111678a17099d3b0a2e07869710ddbf71f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt
@@ -0,0 +1,92 @@
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
+
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
+
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775c3ac31210e0add3a732d58796d92f5d3ecad4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8113cded6977e9950c5cdedca02c4ca0bc682cb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
+
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
+
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2096bdcfd078cd22d057d637387ba9f701c7d262
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
+
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
+
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82087300f3ffda107e21d7b11a0512c61ac0fddb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_2.txt
@@ -0,0 +1,51 @@
+Photo by Juan Mayobre.
+
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
+
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions: 
+
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
+
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c37643e441ba3fd8961ae50bbf9cfa7799b9411
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_3.txt
@@ -0,0 +1,61 @@
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
+
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```  
+
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
+
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c37399ce0b7b5851cb0d71978b7e6ec1300537
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_4.txt
@@ -0,0 +1,59 @@
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
+
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
+
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models: 
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5fdd2065d2493610c5b3e02e0001eede87cb3a4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_idefics.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+"\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
+
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd9b6b1e8ca9c77538321b321787ca8c1ff89e1f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training. 
+
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
+
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
+
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96b082ac51c7a2a9a331288e0052c8aa2c36f3ee
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt
@@ -0,0 +1,65 @@
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
+
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a56884c12ebe37683eaff1f3b1547deb6a73962b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,96 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
+
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
+
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35db1aaa64ac70e7de8ff40c236f5ea501bc01b8
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_1.txt
@@ -0,0 +1,98 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
+
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+ def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d22025ccc761ef0fab5164610408a9e04f669f07
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_2.txt
@@ -0,0 +1,98 @@
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5d3343d11bd75c63bdbc32a3364f925e294318a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_3.txt
@@ -0,0 +1,85 @@
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..269fc605834e3cec8532e578c956cc9ec0565caa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_classification.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6af28536beda08f68c921df2dd246480d7fc580a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
+
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
+
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
+
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
+
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
+
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
+
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f80fc06f89378a30b40e8063978c9ec89dd275e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
+
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
+
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
+
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
+
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c6563f1b78b5c688c347aee126668ce5745592a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3ad7ebda3fd979ed882844d5728b040819d883e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process. 
+
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
+
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
+
+    with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e17db105a591ebd756bf9ba32c54acf44cbc454
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
@@ -0,0 +1,73 @@
+# Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
+
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
+
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
+
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1f8ff5a01d7a8a3f86631d760d70ab802867d54
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe9b8f009de21faedde2cd5cdb3ed307326ee32e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,70 @@
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18bc916d4afca0f33415175043a71f9c0d16e486
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,99 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ce143797772560f7ba0d7424ed3e5841d0d64df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,70 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
+
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23e8586df72c347ae6a59d6a41a1979e9fdeed9c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks. 
+
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
+
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ce63b87c087b08ac7e0d7860ac23b7123160c1c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
+
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
+
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c28c687602352fecda37dd24d3ed9c83fe31997e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2afdeb19462285a0f0feae52b25bd7ff037feed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM. 
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da9b863c1e95c90cfc4beec1d19b59cfa22c11e2
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,99 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..219c9073aec5babf80be192143e5b2e777f848ba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,92 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
+
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
+
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a13b2d760b954f57f03d95c99f3f4212ce4ffea
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32560667242bca8b18a96bfd217def9f0484bd3e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
+
+     first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abb4e63ee0e59e727b8702de4624958763b46b24
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt
@@ -0,0 +1,99 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3748412d26b4b34485951d55b9852405d8891803
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt
@@ -0,0 +1,106 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ecb843010392a7d522c5568d1a0b38963f2b09f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8003d638cf0fed21ba4aeef9852a5a12c5f0f160
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,79 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
+
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
+
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b81deaaec7e6b3033d90d4f3de376b6471024b05
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,63 @@
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
+
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
+
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
+
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_10.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74eb7ab739f758cad1d33cdd2815600cacca618a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_10.txt
@@ -0,0 +1,133 @@
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
+
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_11.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c51c0009d8e980a1655db3d9844bcce08ded352e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_11.txt
@@ -0,0 +1,43 @@
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4715eaea322faabc7b5a784d9a4e9b519c07d87f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,95 @@
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
+
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
+
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
+
+     Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
+
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
+
+     images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32c63bcefb8beddf0ac4c9f4376bb3257d872922
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_3.txt
@@ -0,0 +1,32 @@
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
+
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
+
+      [[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..995322c687b76a9f565a5c3c46280ba355eca23c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_4.txt
@@ -0,0 +1,52 @@
+[[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
+
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
+
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
+
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba98f7f5c08d6f044599723571c9b3b3f39b0eec
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_5.txt
@@ -0,0 +1,80 @@
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
+
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
+
+     Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
+
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
+
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e522944ac8701fed7569b3225c9c1229765177e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_6.txt
@@ -0,0 +1,157 @@
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
+
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..844892bd3a452a22c3aa3b0b2b6e5552c032f348
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_7.txt
@@ -0,0 +1,207 @@
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a349969dd4c883bef48e22a1c67854edb6fa350
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_8.txt
@@ -0,0 +1,207 @@
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..877a67bee7a5dc3d5e827ada5cff1ed13baeb948
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_object_detection.txt_chunk_9.txt
@@ -0,0 +1,207 @@
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
+
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f75f160d0b3732ab48bc35731de99b9d79c2898
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
+
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
+
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
+
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3120ba0ed1923d5037bedd448d1800354c29e096
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_1.txt
@@ -0,0 +1,93 @@
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
+
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
+
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
+
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
+
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14ad9e08604e7d00a4b42430e6fe4ffa8c0a466a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_2.txt
@@ -0,0 +1,82 @@
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
+
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
+
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
+
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
+
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
+
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
+
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c205fcd3e2fd73daf7ff413e1f11b4e6db6c96a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
+
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts. 
+
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d00aef3e1e4af2f8f736a11a17c855e12ade409
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_prompting.txt_chunk_4.txt
@@ -0,0 +1,36 @@
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
+
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64426b7f51251881084a9f262b20baf11746e088
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
+
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec8b2aefd126affe83e572ab1a78023d06a01ccc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
+
+     offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f83ae1e35fb6dee390c5a7e64ef51ac45541aa7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_2.txt
@@ -0,0 +1,103 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..158f88c068ee26ae08a30340b7e46a51cf41f7b4
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_question_answering.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70c04d281a1bc30de8bebcaea7542d3ab7a9faa0
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
+
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
+
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fbd2d0512846b792651dd809d9c45705aa44c5a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
+
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
+
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
+
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..681db0afe94b2e9365dbee72aa227b83420880a7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
@@ -0,0 +1,78 @@
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
+
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
+
+ from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
+ 
+
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
+
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96df6ec2fe2bf46360eeed849b0434d8528dea2f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
@@ -0,0 +1,95 @@
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
+
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
+
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
+
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
+
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2db70ee0481b10d3479097d267982943478cea13
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
@@ -0,0 +1,104 @@
+pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
+
+     pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c02d51ac90a2b376f75ba5f022ebdab34ff2a4f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
@@ -0,0 +1,109 @@
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2121b3292074e383c03cbcc0930a81c31a679c2c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c7e4c4217b00c4d779025f5f6f3e5afc6c08204
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
@@ -0,0 +1,91 @@
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da6e2e154e162631dca1dd5ed79dc48f9f726618
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
@@ -0,0 +1,75 @@
+[0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de327c3d0fa30a1ac6efd00b18e375aaf7a8be95
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7887f02d1b99f6fe229f856ff88009170b52657e
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
+
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
+
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc2fb447945854d04d2a2925960ea5ba4c39096f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d4be9bde77fce726cb0fc339bec6192751da952
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt
@@ -0,0 +1,104 @@
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeb82332ebd358d48e16f3ba332467c64a157dad
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04b7e97b16ea7e0230728530f0f2dcb8c723f59f
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df981cc0962371df7f0412c0f124278006bd52ce
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a1f95fe2a70aafeba345525968cc6ed7d84a574
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_3.txt
@@ -0,0 +1 @@
+'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8015e8c7776d70cf815ffa8910ec097d68c3ecb
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_4.txt
@@ -0,0 +1,74 @@
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
+
+     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+
+     result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_5.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f90a8a4cbd29a01bef7a5681e8b0e25c20132f06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_5.txt
@@ -0,0 +1,95 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
+
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_6.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9956121bee067ec4271d3a4997e754b21e05dfa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_summarization.txt_chunk_6.txt
@@ -0,0 +1,44 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
+
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c38dcce7b5dd9e5e63a98d07f659c52707510df
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aa0f75bef5051e747d21f3f1b90da702d0cc54c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,99 @@
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
+
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
+
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
+
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
+
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36d61a0436fea71fe7c113d684b74d4de790b8b7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt
@@ -0,0 +1,95 @@
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
+
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
+
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
+
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..638c501ac0675dd0f553a595e0028c1f03122d22
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt
@@ -0,0 +1,95 @@
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
+
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
+
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
+
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05aca162a4560421c8cd63fc42caddb65df7e8fe
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt
@@ -0,0 +1,103 @@
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
+
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
+
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775d22bf1bb6262fc35ba0aa19edd6f3081e58b5
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,80 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
+
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
+
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
+
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..090f9af5fe2cd8769146efb8e31920d8f7ddf5bc
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_1.txt
@@ -0,0 +1,72 @@
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+
+     labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
+
+     true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..074c153f6912c8b9bd8f40d2884d77f0a1a19ffa
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_2.txt
@@ -0,0 +1,100 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bdd0980991c426e25c254e4d0942bab91fe5697
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_3.txt
@@ -0,0 +1,111 @@
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d792a3797b961e1b0a7684436f34ff77d5464a5c
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_token_classification.txt_chunk_4.txt
@@ -0,0 +1,64 @@
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e50b381204f30b1202378fcc87fb831a06ddd773
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c433e928ba2a7e63f74cb663b14b70e5133216
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9346e79fbbbd94da097f0a432b40175bfdb0e2ed
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_2.txt
@@ -0,0 +1,85 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
+
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2576ce729f764e1c8cee119cd724d514766f51a7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_translation.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c53f223923cc375c70ee2f9407747f56dd658f49
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,105 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
+
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
+
+label2id: maps the class names to integers.
+id2label: maps the integers to class names.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af5f4ea9c4d66b332c7f13c73c22a57717fd0971
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,64 @@
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
+
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
+ 
+
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
+
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need. 
+ 
+
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
+
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36f499a0bbbfa1f66ad578aa3145901e8be8011a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_2.txt
@@ -0,0 +1,115 @@
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set: 
+ 
+
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
+ 
+
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d10aa70ee9722b2523537e7906f744b0e20e6a88
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_3.txt
@@ -0,0 +1,78 @@
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
+ 
+
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
+
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
+ 
+
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_4.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b3850418d5d8c8a0a0206892508df2b56e9c13b
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_video_classification.txt_chunk_4.txt
@@ -0,0 +1,53 @@
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
+
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
+
+     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44d23563b6a557576abc1c5eacf0f6754e9dfb39
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
+
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
+
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfc07c790b3029d7825014e0ac1f247b6e561cca
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
+
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
+
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
+
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea3702d37d481e39ff4ce3ec892823d4e3f72a06
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt
@@ -0,0 +1,96 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
+
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
+
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6e4ec79bc520a697363c927945aae0f009a2dba
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt
@@ -0,0 +1,37 @@
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically: 
+
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
+
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ad465824c6d687f266c2c1099c8ef5ec1a5813a
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
+
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b08e6177af0b2766d2a7e29be387df04b767962
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f47d646ed06de0558e2eaa7f844827289444fb98
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,80 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for. 
+
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e37992ea868d3a1ebcbdcbdf2e7c07b66548ad6
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
+
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
+
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
\ No newline at end of file
diff --git a/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f45057de10ce7016afdcd875886520ee0d6ceaf7
--- /dev/null
+++ b/BAAI_bge-large-en-v1.5/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file
diff --git a/backend/__pycache__/query_llm.cpython-310.pyc b/backend/__pycache__/query_llm.cpython-310.pyc
deleted file mode 100644
index fdae2fd0864436c6e479e48f5d4c5b124b1ed14f..0000000000000000000000000000000000000000
Binary files a/backend/__pycache__/query_llm.cpython-310.pyc and /dev/null differ
diff --git a/backend/__pycache__/semantic_search.cpython-310.pyc b/backend/__pycache__/semantic_search.cpython-310.pyc
deleted file mode 100644
index 1b099ba3f31cf2d309e100a34a5a74afb4e846af..0000000000000000000000000000000000000000
Binary files a/backend/__pycache__/semantic_search.cpython-310.pyc and /dev/null differ
diff --git a/backend/chunk_splitting.py b/backend/chunk_splitting.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f262e71fba4882dbfed676dd809b5492428f985
--- /dev/null
+++ b/backend/chunk_splitting.py
@@ -0,0 +1,76 @@
+from typing import List
+import os
+from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+
+def fixed_size_split(text: str, chunk_size: int, chunk_overlap: int) -> List[str]:
+    text_splitter = CharacterTextSplitter(
+        separator="\n\n",
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap
+    )
+    docs = text_splitter.split_text(text)
+    return docs
+
+def recursive_character_split(text: str, chunk_size: int, chunk_overlap: int, model_name: str = "gpt-4o") -> List[str]:
+    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+        model_name=model_name,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        separators=["\n\n", "\n"],
+        allowed_special={'<|endoftext|>'},
+        disallowed_special=()
+    )
+    docs = text_splitter.split_text(text)
+    return docs
+
+def save_chunks(chunks, output_dir, base_filename):
+    for i, chunk in enumerate(chunks):
+        with open(os.path.join(output_dir, f"{base_filename}_chunk_{i}.txt"), 'w') as f:
+            f.write(chunk)
+
+def chunk_documents(embedding_model: str):
+    # Set chunk size and overlap based on embedding model
+    if embedding_model == "sentence-transformers/all-MiniLM-L6-v2":
+        chunk_size = 500
+        chunk_overlap = 50
+    elif embedding_model == "BAAI/bge-large-en-v1.5":
+        chunk_size = 1000
+        chunk_overlap = 100
+    elif embedding_model == "openai/text-embedding-ada-002":
+        chunk_size = 4096
+        chunk_overlap = 200
+    else:
+        raise ValueError(f"Unsupported embedding model: {embedding_model}")
+
+    # Directory paths
+    input_dir = "/Users/anvereshko/Desktop/rag-gradio-sample-project/gradio_app/documentation"
+    model_dir = os.path.join(os.path.dirname(input_dir), f"{embedding_model.replace('/', '_')}")
+    fixed_output_dir = os.path.join(model_dir, "fixed_chunks")
+    recursive_output_dir = os.path.join(model_dir, "recursive_chunks")
+
+    # Create output directories if they don't exist
+    os.makedirs(fixed_output_dir, exist_ok=True)
+    os.makedirs(recursive_output_dir, exist_ok=True)
+
+    # Process each document
+    for filename in os.listdir(input_dir):
+        if filename.endswith(".txt"):
+            with open(os.path.join(input_dir, filename), 'r') as file:
+                text = file.read()
+            
+            # Chunk using fixed size
+            fixed_chunks = fixed_size_split(text, chunk_size, chunk_overlap)
+            save_chunks(fixed_chunks, fixed_output_dir, filename)
+            
+            # Chunk using recursive character splitting
+            recursive_chunks = recursive_character_split(text, chunk_size, chunk_overlap)
+            save_chunks(recursive_chunks, recursive_output_dir, filename)
+
+if __name__ == "__main__":
+    embedding_models = [
+        "sentence-transformers/all-MiniLM-L6-v2",
+        "BAAI/bge-large-en-v1.5",
+        "openai/text-embedding-ada-002"
+    ]
+    for model in embedding_models:
+        chunk_documents(model)
\ No newline at end of file
diff --git a/backend/query_llm.py b/backend/query_llm.py
index 2b8b04b6560d5ba071cc41df782d8fa80649bcde..c21703403771b7b8b6a61a92d1c26929f38e019a 100644
--- a/backend/query_llm.py
+++ b/backend/query_llm.py
@@ -10,16 +10,18 @@ from transformers import AutoTokenizer
 
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 HF_TOKEN = os.getenv("HF_TOKEN")
-TOKENIZER = AutoTokenizer.from_pretrained(os.getenv("HF_MODEL"))
+HF_MODEL = os.getenv("HF_MODEL")
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
 
 HF_CLIENT = InferenceClient(
     os.getenv("HF_MODEL"),
     token=HF_TOKEN
 )
 OAI_CLIENT = openai.Client(api_key=OPENAI_KEY)
+TOKENIZER = AutoTokenizer.from_pretrained(HF_MODEL)
 
 HF_GENERATE_KWARGS = {
-    'temperature': max(float(os.getenv("TEMPERATURE", 0.9)), 1e-2),
+    'temperature': max(float(os.getenv("TEMPERATURE", 0.1)), 1e-2),
     'max_new_tokens': int(os.getenv("MAX_NEW_TOKENS", 256)),
     'top_p': float(os.getenv("TOP_P", 0.6)),
     'repetition_penalty': float(os.getenv("REP_PENALTY", 1.2)),
@@ -27,8 +29,8 @@ HF_GENERATE_KWARGS = {
 }
 
 OAI_GENERATE_KWARGS = {
-    'temperature': max(float(os.getenv("TEMPERATURE", 0.9)), 1e-2),
-    'max_tokens': int(os.getenv("MAX_NEW_TOKENS", 256)),
+    'temperature': max(float(os.getenv("TEMPERATURE", 0.1)), 1e-2),
+    'max_tokens': int(os.getenv("MAX_NEW_TOKENS", 1024)),
     'top_p': float(os.getenv("TOP_P", 0.6)),
     'frequency_penalty': max(-2, min(float(os.getenv("FREQ_PENALTY", 0)), 2))
 }
diff --git a/backend/reranking.py b/backend/reranking.py
new file mode 100644
index 0000000000000000000000000000000000000000..c24e31ce286e0be38bc409d65defd55fd4fb9682
--- /dev/null
+++ b/backend/reranking.py
@@ -0,0 +1,43 @@
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CrossEncoderReranker
+
+class DocumentRetrieverWithReranker:
+    def __init__(self, retriever, reranker_model_name="BAAI/bge-reranker-base", top_n=3):
+        self.retriever = retriever
+        self.reranker_model_name = reranker_model_name
+        self.top_n = top_n
+        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        self.tokenizer = AutoTokenizer.from_pretrained(self.reranker_model_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(self.reranker_model_name)
+        self.model = self.model.to(self.device)
+        self.compressor = CrossEncoderReranker(model=self, top_n=self.top_n)
+        self.compression_retriever = ContextualCompressionRetriever(
+            base_compressor=self.compressor, base_retriever=self.retriever
+        )
+
+    def __call__(self, pairs):
+        with torch.inference_mode():
+            inputs = self.tokenizer(
+                pairs,
+                padding=True,
+                truncation=True,
+                return_tensors="pt",
+                max_length=512,
+            )
+            inputs = inputs.to(self.device)
+            scores = self.model(**inputs, return_dict=True).logits.view(-1).float()
+
+        return scores.detach().cpu().tolist()
+
+    def retrieve_and_rerank(self, query):
+        return self.compression_retriever.invoke(query)
+
+    @staticmethod
+    def pretty_print_docs(docs):
+        print(
+            f"\n{'-' * 100}\n".join(
+                [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
+            )
+        )
\ No newline at end of file
diff --git a/documentation/__config.txt b/documentation/__config.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/documentation/__config.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/documentation/__redirects.txt b/documentation/__redirects.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/documentation/__redirects.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/documentation/__toctree.txt b/documentation/__toctree.txt
new file mode 100644
index 0000000000000000000000000000000000000000..391276a18be721c5044df07e9db6a3129e9a8e45
--- /dev/null
+++ b/documentation/__toctree.txt
@@ -0,0 +1,904 @@
+
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
+
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
+
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
+
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
+
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
+
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
+
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
+
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
+
diff --git a/documentation/_accelerate.txt b/documentation/_accelerate.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c95a9b90912f75a837a4ce4190f6be9ceb3b5ff
--- /dev/null
+++ b/documentation/_accelerate.txt
@@ -0,0 +1,90 @@
+
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/documentation/_add_new_model.txt b/documentation/_add_new_model.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd6af218e4c27131f1811bc0be84cc3f436f827b
--- /dev/null
+++ b/documentation/_add_new_model.txt
@@ -0,0 +1,685 @@
+
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
+
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
+
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
+
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
+
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
+
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
+
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
+
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
+
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
+
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
+
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
+
+   git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
+
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
+
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
+
+Jupyter notebooks / google colab
+Local python scripts.
+
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
+
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
+
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
+
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
+
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
+
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
+
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
+
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
+
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
+
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
+
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
+
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
+
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
+
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
+
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
+
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
+
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
+
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
+
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
+
+make style
+and verify that your coding style passes the quality check:
+
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
+
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/documentation/_add_new_pipeline.txt b/documentation/_add_new_pipeline.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85afe20b326d893dc248d18cf37fe181b469bad6
--- /dev/null
+++ b/documentation/_add_new_pipeline.txt
@@ -0,0 +1,180 @@
+
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
+
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
+
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
+
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
+
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
+
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
+
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
+
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
+
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
+
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
+
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
diff --git a/documentation/_agents.txt b/documentation/_agents.txt
new file mode 100644
index 0000000000000000000000000000000000000000..814cbbf119ba8e5a0bcfe55414db4c43509da3ce
--- /dev/null
+++ b/documentation/_agents.txt
@@ -0,0 +1,338 @@
+
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
+
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
+
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
+
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
+
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
+
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
+
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
+
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
+
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
+
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
+
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
+
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
+
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/documentation/_attention.txt b/documentation/_attention.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14ff503f4c355f33a5a3eb828d442603a81f6156
--- /dev/null
+++ b/documentation/_attention.txt
@@ -0,0 +1,33 @@
+
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
+
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/documentation/_autoclass_tutorial.txt b/documentation/_autoclass_tutorial.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1376069fbd7117fdad9336792c0351f94f8543c0
--- /dev/null
+++ b/documentation/_autoclass_tutorial.txt
@@ -0,0 +1,113 @@
+
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
+
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
+
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
+
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
+
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
diff --git a/documentation/_benchmarks.txt b/documentation/_benchmarks.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17428aa021ea647618b9d3731f42479d9ad87b02
--- /dev/null
+++ b/documentation/_benchmarks.txt
@@ -0,0 +1,299 @@
+
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
+
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
+
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
+
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
+
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
diff --git a/documentation/_bertology.txt b/documentation/_bertology.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bde6bf87d4ff4a1d543e79323f6c290bc2f2af10
--- /dev/null
+++ b/documentation/_bertology.txt
@@ -0,0 +1,23 @@
+
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
+
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/documentation/_big_models.txt b/documentation/_big_models.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b75b5c8633bffc01dac2c0941304aea47bc614f
--- /dev/null
+++ b/documentation/_big_models.txt
@@ -0,0 +1,145 @@
+
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
+
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
+
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
+
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
+
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
+
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
+
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/documentation/_chat_templating.txt b/documentation/_chat_templating.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bac3e9c1c2d1c67ed4dc31d66aa102dc5f002c83
--- /dev/null
+++ b/documentation/_chat_templating.txt
@@ -0,0 +1,642 @@
+
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
+
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
+
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
+
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
+
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
+
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
+
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
+
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
+
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
+
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
+
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+schema = get_json_schema(multiply)
+print(schema)
+
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
+
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
+
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
+
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
+
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
+
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
+
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
diff --git a/documentation/_community.txt b/documentation/_community.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8ff0cfa17827f8e7533e98fe725b87ddcebb9e6
--- /dev/null
+++ b/documentation/_community.txt
@@ -0,0 +1,61 @@
+
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/documentation/_contributing.txt b/documentation/_contributing.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ade8bd573d29cfa11192dad8780af303769044c
--- /dev/null
+++ b/documentation/_contributing.txt
@@ -0,0 +1,263 @@
+
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
+
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
+
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
+
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
+
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+
+   pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+
+   make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
+
+   git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
+
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
+
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
+
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
+
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/documentation/_conversations.txt b/documentation/_conversations.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6ffad03193029e44c830b7560e3c6faa62ecfe2
--- /dev/null
+++ b/documentation/_conversations.txt
@@ -0,0 +1,211 @@
+
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
+
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
+
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
+
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
+
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
+
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
+
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/documentation/_create_a_model.txt b/documentation/_create_a_model.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2f0c75f8228d3b530872898f65ccb97abd9e12b
--- /dev/null
+++ b/documentation/_create_a_model.txt
@@ -0,0 +1,312 @@
+
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
+
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
+
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
+
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
+
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
+
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
+
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
+
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
+
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/documentation/_custom_models.txt b/documentation/_custom_models.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0336893563c90efb25714f493af3d13e3e7cfd2
--- /dev/null
+++ b/documentation/_custom_models.txt
@@ -0,0 +1,265 @@
+
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
+
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
+
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
+
+    self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
+
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
+
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
+
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
+
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
+
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
+
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
+
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
+
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/documentation/_debugging.txt b/documentation/_debugging.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55d4731b84a0c5c64f5bebb853bb95c4316f6328
--- /dev/null
+++ b/documentation/_debugging.txt
@@ -0,0 +1,313 @@
+
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
+
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
+
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
+
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
+
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
+
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
+
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
+
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
+
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
+
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
+
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
+
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
+
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
+
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
+
+                  *** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
+
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/documentation/_deepspeed.txt b/documentation/_deepspeed.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c3895e74de82590b7dcf6db715fe52b6a82856a
--- /dev/null
+++ b/documentation/_deepspeed.txt
@@ -0,0 +1,870 @@
+
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
+
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
+
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
+
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
+
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
+
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
+
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
+
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
+
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
+
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
+
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
+
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
+
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
+
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
+
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
+
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
+
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
+
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
+
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
+
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
+
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
+
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
+
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
+
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
+
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
+
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
+
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
+
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
+
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
+
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
+
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
+
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
+
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
+
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
+
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
+
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
+
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
+
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
+
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
+
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
+
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
+
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
+
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
+
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
+
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
+
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
+
+Save the script as t0.py and launch it:
+
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
+
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
+
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
+
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
+
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
diff --git a/documentation/_fast_tokenizers.txt b/documentation/_fast_tokenizers.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b0dbaf01081794c249eb4ab0b1b36d6a2dd7e25
--- /dev/null
+++ b/documentation/_fast_tokenizers.txt
@@ -0,0 +1,45 @@
+
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
+
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/documentation/_fsdp.txt b/documentation/_fsdp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c44381db1d540ec5741470c99e1ba7ad4e9b4c95
--- /dev/null
+++ b/documentation/_fsdp.txt
@@ -0,0 +1,85 @@
+
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
+
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
+
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
+
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
+
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
+
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
diff --git a/documentation/_generation_strategies.txt b/documentation/_generation_strategies.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b82620923e1aa4788a13f4bc6bad9ee52e1fb12
--- /dev/null
+++ b/documentation/_generation_strategies.txt
@@ -0,0 +1,360 @@
+
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
+
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
+
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
+
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
+
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
+
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
+
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
+
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
+
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
+
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
+
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
+
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
+
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
+
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
+
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
+
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
+
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/documentation/_gguf.txt b/documentation/_gguf.txt
new file mode 100644
index 0000000000000000000000000000000000000000..397522e044f3689af6f2d3869b57833031f446c5
--- /dev/null
+++ b/documentation/_gguf.txt
@@ -0,0 +1,64 @@
+
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
+
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/documentation/_glossary.txt b/documentation/_glossary.txt
new file mode 100644
index 0000000000000000000000000000000000000000..463ce099ae4d72e5e353adaf2b756dcc4e3921a9
--- /dev/null
+++ b/documentation/_glossary.txt
@@ -0,0 +1,336 @@
+
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
+
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
+
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
+
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
+
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
+
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
+
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
+
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
+
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
+
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
+
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
+
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/documentation/_hpo_train.txt b/documentation/_hpo_train.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13f4e52b589870b64c75ebde6730300974af3d1d
--- /dev/null
+++ b/documentation/_hpo_train.txt
@@ -0,0 +1,99 @@
+
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
+
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
+
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
+
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/documentation/_index.txt b/documentation/_index.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/documentation/_installation.txt b/documentation/_installation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e3808fc62b6ba3f5fe3a69e7598ca930e4289b4
--- /dev/null
+++ b/documentation/_installation.txt
@@ -0,0 +1,139 @@
+
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
+
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
+
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
diff --git a/documentation/_llm_optims.txt b/documentation/_llm_optims.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8de0f43c852c6953dbaa89e674d8ccf3393aa7a2
--- /dev/null
+++ b/documentation/_llm_optims.txt
@@ -0,0 +1,228 @@
+
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
+
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
+
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
+
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
+
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
+
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
+
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
+
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
+
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
+
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
+
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
+
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
+
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
+
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
+
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/documentation/_llm_tutorial.txt b/documentation/_llm_tutorial.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a9d4bc1f8dfc86eb70d2b2450fba37d6c2d135d
--- /dev/null
+++ b/documentation/_llm_tutorial.txt
@@ -0,0 +1,166 @@
+
+
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
+
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
+
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
+
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
+
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
+
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
+
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
+
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
+
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
diff --git a/documentation/_llm_tutorial_optimization.txt b/documentation/_llm_tutorial_optimization.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeb132bd24210dcdd5d70578df5af794129a3c54
--- /dev/null
+++ b/documentation/_llm_tutorial_optimization.txt
@@ -0,0 +1,525 @@
+
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
+
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
+
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
+
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
+
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
+
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
+
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
+
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
+
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
+
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
+
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
+
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
+
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
+
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
+
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
+
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
+
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
+
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
+
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
+
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
+
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
+
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
+
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
+
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
+
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
+
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
+
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
+
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
+
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
+
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
+
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
+
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
+
+Falcon
+PaLM
+MPT
+BLOOM
+
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
+
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
+
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/documentation/_model_memory_anatomy.txt b/documentation/_model_memory_anatomy.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5b2f7cb742fbac1cfe15c02f6a5688733ee9c0d
--- /dev/null
+++ b/documentation/_model_memory_anatomy.txt
@@ -0,0 +1,188 @@
+
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
+
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
+
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
+
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
+
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
+
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
+
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
+
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
+
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques. 
\ No newline at end of file
diff --git a/documentation/_model_sharing.txt b/documentation/_model_sharing.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aee9a826acbfb8f53282ce25955dc6fcc997427c
--- /dev/null
+++ b/documentation/_model_sharing.txt
@@ -0,0 +1,139 @@
+
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
+
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
+
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
+
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
+
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/documentation/_model_summary.txt b/documentation/_model_summary.txt
new file mode 100644
index 0000000000000000000000000000000000000000..480e12be801d4534d09bbe9d5d5a97efeb5fa2ed
--- /dev/null
+++ b/documentation/_model_summary.txt
@@ -0,0 +1,45 @@
+
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
+
+Computer vision
+
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
+
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
+
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
+
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
+
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/documentation/_multilingual.txt b/documentation/_multilingual.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4e8a6ee6bad110884999ae63816576745c7eb39
--- /dev/null
+++ b/documentation/_multilingual.txt
@@ -0,0 +1,120 @@
+
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
+
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
+
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
+
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
+
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/documentation/_notebooks.txt b/documentation/_notebooks.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f62fa7b3867edff12a4c99fe98622648e70fa336
--- /dev/null
+++ b/documentation/_notebooks.txt
@@ -0,0 +1,103 @@
+
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/documentation/_pad_truncation.txt b/documentation/_pad_truncation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..963120e7ad7a7af188501dedb0761780f4b48f06
--- /dev/null
+++ b/documentation/_pad_truncation.txt
@@ -0,0 +1,51 @@
+
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
+
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
+
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
+
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/documentation/_peft.txt b/documentation/_peft.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/documentation/_perf_hardware.txt b/documentation/_perf_hardware.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9beceafebc23f3adf7e862e0545a63405f9365a
--- /dev/null
+++ b/documentation/_perf_hardware.txt
@@ -0,0 +1,84 @@
+
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
+
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
+
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
+
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
+
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/documentation/_perf_infer_cpu.txt b/documentation/_perf_infer_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40fe92e8c33eb64f1991825781bab51d1a30dcc8
--- /dev/null
+++ b/documentation/_perf_infer_cpu.txt
@@ -0,0 +1,73 @@
+
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
+
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
+
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
+
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
+
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/documentation/_perf_infer_gpu_one.txt b/documentation/_perf_infer_gpu_one.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71acf855f60d4453f7b7d374d74e35516c2a3b12
--- /dev/null
+++ b/documentation/_perf_infer_gpu_one.txt
@@ -0,0 +1,299 @@
+
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
+
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
+
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
+
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
+
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
+
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
+
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
+
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
+
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/documentation/_perf_torch_compile.txt b/documentation/_perf_torch_compile.txt
new file mode 100644
index 0000000000000000000000000000000000000000..914eabe0736b9c0d5152c9fb7a354137932664b5
--- /dev/null
+++ b/documentation/_perf_torch_compile.txt
@@ -0,0 +1,253 @@
+
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
+
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
+
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
+
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes. 
+
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/documentation/_perf_train_cpu.txt b/documentation/_perf_train_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0df8dd77e482a7cb375e94f3d2fdd11be4ffc349
--- /dev/null
+++ b/documentation/_perf_train_cpu.txt
@@ -0,0 +1,52 @@
+
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
+
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/documentation/_perf_train_cpu_many.txt b/documentation/_perf_train_cpu_many.txt
new file mode 100644
index 0000000000000000000000000000000000000000..031a6efffe2ad908871244d5654c7d4143afe5f6
--- /dev/null
+++ b/documentation/_perf_train_cpu_many.txt
@@ -0,0 +1,246 @@
+
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
+
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
+
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
+
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
+
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
+
+              resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
+
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
+
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
+
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
+
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/documentation/_perf_train_gpu_many.txt b/documentation/_perf_train_gpu_many.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2eafa17aa8dccdcf698476309708847aa077aa6e
--- /dev/null
+++ b/documentation/_perf_train_gpu_many.txt
@@ -0,0 +1,449 @@
+
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation. 
+
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
+
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
+
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
+
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
+
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:  
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
+
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
+
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
+
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
+
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
+
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient. 
+
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
+
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
+
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
+
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
+
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
+
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
+
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
+
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
+
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
+
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
+
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/documentation/_perf_train_gpu_one.txt b/documentation/_perf_train_gpu_one.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de65bf26d65e50ce2e273477443c15a1c8541695
--- /dev/null
+++ b/documentation/_perf_train_gpu_one.txt
@@ -0,0 +1,365 @@
+
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
+
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
+
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
+
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide. 
+
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
+
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
+
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
+
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
+
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
+
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
+
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
+
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed: 
+
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
+
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
+
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
+
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
+
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
+
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/documentation/_perf_train_special.txt b/documentation/_perf_train_special.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b76734d7cdeea46acbafd81eeb82d80246ecb3e8
--- /dev/null
+++ b/documentation/_perf_train_special.txt
@@ -0,0 +1,35 @@
+
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
+
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/documentation/_perf_train_tpu_tf.txt b/documentation/_perf_train_tpu_tf.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4299374ac34bb1832f5b3cf522f634123c73a4c
--- /dev/null
+++ b/documentation/_perf_train_tpu_tf.txt
@@ -0,0 +1,83 @@
+
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
+
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
+
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
+
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
+
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
+
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
+
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
+
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
+
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
diff --git a/documentation/_performance.txt b/documentation/_performance.txt
new file mode 100644
index 0000000000000000000000000000000000000000..320367c9b0eaf89da381db658a5a0cf1c32169f8
--- /dev/null
+++ b/documentation/_performance.txt
@@ -0,0 +1,44 @@
+
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
+
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/documentation/_perplexity.txt b/documentation/_perplexity.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a43e9f11c1a12f2b8a562f2d02b660a40bbfcb00
--- /dev/null
+++ b/documentation/_perplexity.txt
@@ -0,0 +1,98 @@
+
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
+
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
+
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
+
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
+
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
+
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
+
+    # loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/documentation/_philosophy.txt b/documentation/_philosophy.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abb88f3a0d4ae67249a1f96008587e2069adcdf
--- /dev/null
+++ b/documentation/_philosophy.txt
@@ -0,0 +1,66 @@
+
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
+
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
+
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
+
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
+
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
diff --git a/documentation/_pipeline_tutorial.txt b/documentation/_pipeline_tutorial.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e9b1f99fa4a8a3d273cf6cf2bfb63169dd4fe99
--- /dev/null
+++ b/documentation/_pipeline_tutorial.txt
@@ -0,0 +1,219 @@
+
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
+
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
+
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
+
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
+
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
+
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
+
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
+
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
+
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
+
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
+
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
+
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
+
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link. 
\ No newline at end of file
diff --git a/documentation/_pipeline_webserver.txt b/documentation/_pipeline_webserver.txt
new file mode 100644
index 0000000000000000000000000000000000000000..119e385b3b39d0cb14b887cb2e4065610051b08e
--- /dev/null
+++ b/documentation/_pipeline_webserver.txt
@@ -0,0 +1,124 @@
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
+
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
+
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
+
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/documentation/_pr_checks.txt b/documentation/_pr_checks.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea1af802d66b33d807086e04f3d5a113394f9fbd
--- /dev/null
+++ b/documentation/_pr_checks.txt
@@ -0,0 +1,35 @@
+
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
+
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
+
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/documentation/_preprocessing.txt b/documentation/_preprocessing.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74a5a7b8812187fcce48494183818fcdcfb4beae
--- /dev/null
+++ b/documentation/_preprocessing.txt
@@ -0,0 +1,363 @@
+
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
+
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
+
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
+
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
+
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
+
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
+
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
+
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
+
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
+
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
+
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
+
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
+
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
+
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
+
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/documentation/_quicktour.txt b/documentation/_quicktour.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc1eb8f38994693cdae7168ebd16a388b25b9be4
--- /dev/null
+++ b/documentation/_quicktour.txt
@@ -0,0 +1,353 @@
+
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
+
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
+
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
+
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
+
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
+
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
+
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
+
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
+
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
+
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
+
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
+
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/documentation/_run_scripts.txt b/documentation/_run_scripts.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b27973e4a18ce020c15aeb2a22e72cf0cf4c827a
--- /dev/null
+++ b/documentation/_run_scripts.txt
@@ -0,0 +1,256 @@
+
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
+
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
+
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
+
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
+
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
+
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/documentation/_sagemaker.txt b/documentation/_sagemaker.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41f37fcb585d4aa00049b50d3e55802e000b0b40
--- /dev/null
+++ b/documentation/_sagemaker.txt
@@ -0,0 +1,7 @@
+
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
diff --git a/documentation/_serialization.txt b/documentation/_serialization.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c85eac949a584c75408fef3452bda5d48bec048a
--- /dev/null
+++ b/documentation/_serialization.txt
@@ -0,0 +1,138 @@
+
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
+
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
+
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
+
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
+
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
+
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/documentation/_task_summary.txt b/documentation/_task_summary.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1df95bcbb5f2f492c7514107f7d6255d221e618
--- /dev/null
+++ b/documentation/_task_summary.txt
@@ -0,0 +1,250 @@
+
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
+
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
+
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
+
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
+
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
+
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
+
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
+
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
+
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
+
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
+
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
+
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
+
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
+
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
+
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
+
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
+
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
+
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
+
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
+
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
+
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
+
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
+
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
+
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
+
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/documentation/_tasks_explained.txt b/documentation/_tasks_explained.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3909cc66a38f09efbf437960e798a9d7f482640f
--- /dev/null
+++ b/documentation/_tasks_explained.txt
@@ -0,0 +1,172 @@
+
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
+
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition. 
+
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
+
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
+
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
+
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
+
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
+
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
+
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride. 
+
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
+
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
+
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
+
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
+
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
+
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
+
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
+
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
+
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
+
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
+
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
+
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
+
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
+
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
+
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
+
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
+
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
+
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
+
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
+
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
+
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
diff --git a/documentation/_testing.txt b/documentation/_testing.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe965e89368137642605c3a4c02c165f74174787
--- /dev/null
+++ b/documentation/_testing.txt
@@ -0,0 +1,873 @@
+
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
+
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
+
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
+
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
+
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
+
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
+
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
+
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
+
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
+
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
+
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
+
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
+
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
+
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
+
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
+
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
+
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
+
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
+
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
+
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
+
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
+
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
+
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
+
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
+
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
+
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
+
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
+
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
+
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
+
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
+
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
+
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
+
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
+
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
+
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
+
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/documentation/_tf_xla.txt b/documentation/_tf_xla.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec6454175f9018ec6d5986f5f9d77ce5ad6b9645
--- /dev/null
+++ b/documentation/_tf_xla.txt
@@ -0,0 +1,106 @@
+
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
+
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
+
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
+
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer. 
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
+
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
+
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general. 
+
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function 
+
diff --git a/documentation/_tflite.txt b/documentation/_tflite.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee39c9e7124056fa046908e3eaa1766c00f0e2b9
--- /dev/null
+++ b/documentation/_tflite.txt
@@ -0,0 +1,32 @@
+
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
+
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub. 
\ No newline at end of file
diff --git a/documentation/_tokenizer_summary.txt b/documentation/_tokenizer_summary.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01b8370e6a842ea3226d1bfadd38c905d2dd1569
--- /dev/null
+++ b/documentation/_tokenizer_summary.txt
@@ -0,0 +1,184 @@
+
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
+
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
+
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
+
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
+
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
+
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
+
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
+
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
+
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
+
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/documentation/_torchscript.txt b/documentation/_torchscript.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69723e0da310f49ab5ca39ad67702a00961450e3
--- /dev/null
+++ b/documentation/_torchscript.txt
@@ -0,0 +1,159 @@
+
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
+
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
+
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
+
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
+
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
+
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
+
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/documentation/_trainer.txt b/documentation/_trainer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13050114a82d4fc62cf0e67735301bb01ce81b58
--- /dev/null
+++ b/documentation/_trainer.txt
@@ -0,0 +1,424 @@
+
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
+
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
+
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
+
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
+
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
+
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
+
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
+
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
+
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
+
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
+
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
+
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
+
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
+
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git. 
+
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
+
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
+
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
+
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/documentation/_training.txt b/documentation/_training.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a41a3a8215b8e777f5e9448c079b6765035b9c5f
--- /dev/null
+++ b/documentation/_training.txt
@@ -0,0 +1,273 @@
+
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
+
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
+
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
+
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
+
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
+
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
+
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
+
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
+
diff --git a/documentation/_troubleshooting.txt b/documentation/_troubleshooting.txt
new file mode 100644
index 0000000000000000000000000000000000000000..394690ef60963d182d822c5f3351b7d2eb9dfc6b
--- /dev/null
+++ b/documentation/_troubleshooting.txt
@@ -0,0 +1,114 @@
+
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
+
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
+
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
+
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig, 
+
diff --git a/documentation/internal_audio_utils.txt b/documentation/internal_audio_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..939ff3020d50e76c67356a862d643d997e385cbf
--- /dev/null
+++ b/documentation/internal_audio_utils.txt
@@ -0,0 +1,13 @@
+
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/documentation/internal_file_utils.txt b/documentation/internal_file_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74fecf1a1ad8632251d6c8ebee845b9f9b6ddcd7
--- /dev/null
+++ b/documentation/internal_file_utils.txt
@@ -0,0 +1,18 @@
+
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/documentation/internal_generation_utils.txt b/documentation/internal_generation_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a348f8268ca0e0feca9fca99706fb45519d401b9
--- /dev/null
+++ b/documentation/internal_generation_utils.txt
@@ -0,0 +1,246 @@
+
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
+
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/documentation/internal_image_processing_utils.txt b/documentation/internal_image_processing_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3054fc829de15ff48f516c062dd6d119b4ab2fbc
--- /dev/null
+++ b/documentation/internal_image_processing_utils.txt
@@ -0,0 +1,18 @@
+
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/documentation/internal_modeling_utils.txt b/documentation/internal_modeling_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76e1a3d8cddec2c0d4249a3ef89ec5697537db38
--- /dev/null
+++ b/documentation/internal_modeling_utils.txt
@@ -0,0 +1,37 @@
+
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/documentation/internal_pipelines_utils.txt b/documentation/internal_pipelines_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24a5708e8223150cffbebcffcf1e1ae6934d86c9
--- /dev/null
+++ b/documentation/internal_pipelines_utils.txt
@@ -0,0 +1,15 @@
+
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/documentation/internal_time_series_utils.txt b/documentation/internal_time_series_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bcfd3919e920535c4f0605f2038c11a717d6061
--- /dev/null
+++ b/documentation/internal_time_series_utils.txt
@@ -0,0 +1,8 @@
+
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/documentation/internal_tokenization_utils.txt b/documentation/internal_tokenization_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05de9b373652cd07bad929ee6aa30ce1f2ca9a82
--- /dev/null
+++ b/documentation/internal_tokenization_utils.txt
@@ -0,0 +1,17 @@
+
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/documentation/internal_trainer_utils.txt b/documentation/internal_trainer_utils.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fe9b8465521cd12bc1f4c849a923d757f43e3ca
--- /dev/null
+++ b/documentation/internal_trainer_utils.txt
@@ -0,0 +1,18 @@
+
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/documentation/main_classes_agent.txt b/documentation/main_classes_agent.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed7146cd3e3e0ce42c3a92d36cbccb4ec1d727a3
--- /dev/null
+++ b/documentation/main_classes_agent.txt
@@ -0,0 +1,75 @@
+
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
+
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
+
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/documentation/main_classes_backbones.txt b/documentation/main_classes_backbones.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c89f4c3bf25434127f7e01f2ff9d35da6cb8984
--- /dev/null
+++ b/documentation/main_classes_backbones.txt
@@ -0,0 +1,34 @@
+
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
+
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/documentation/main_classes_callback.txt b/documentation/main_classes_callback.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd031ace05ebae630b7543ffa5681865226aee01
--- /dev/null
+++ b/documentation/main_classes_callback.txt
@@ -0,0 +1,84 @@
+
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
+
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
+
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
+
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/documentation/main_classes_configuration.txt b/documentation/main_classes_configuration.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96e9bd3eee4aa8f15b4bdc9f55f2a8929469494c
--- /dev/null
+++ b/documentation/main_classes_configuration.txt
@@ -0,0 +1,12 @@
+
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/documentation/main_classes_data_collator.txt b/documentation/main_classes_data_collator.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09dc6bcd6ab2354105e5047e9598d54472bff5fe
--- /dev/null
+++ b/documentation/main_classes_data_collator.txt
@@ -0,0 +1,33 @@
+
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/documentation/main_classes_deepspeed.txt b/documentation/main_classes_deepspeed.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67dbb3ac060a1108342393edb0f9801c8e753f40
--- /dev/null
+++ b/documentation/main_classes_deepspeed.txt
@@ -0,0 +1,10 @@
+
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/documentation/main_classes_feature_extractor.txt b/documentation/main_classes_feature_extractor.txt
new file mode 100644
index 0000000000000000000000000000000000000000..457b31a9b354b0cf71dad725577e50c48b86cde9
--- /dev/null
+++ b/documentation/main_classes_feature_extractor.txt
@@ -0,0 +1,14 @@
+
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/documentation/main_classes_image_processor.txt b/documentation/main_classes_image_processor.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a99c575139cd5cc3f14a399ddfdf1aa421d9849d
--- /dev/null
+++ b/documentation/main_classes_image_processor.txt
@@ -0,0 +1,13 @@
+
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/documentation/main_classes_keras_callbacks.txt b/documentation/main_classes_keras_callbacks.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b9304df7cd4fd9bce35af218efb5e777613543f
--- /dev/null
+++ b/documentation/main_classes_keras_callbacks.txt
@@ -0,0 +1,8 @@
+
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/documentation/main_classes_logging.txt b/documentation/main_classes_logging.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a696a5300c7d37170e4535744c0032f95ae871fc
--- /dev/null
+++ b/documentation/main_classes_logging.txt
@@ -0,0 +1,67 @@
+
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
+
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
+
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
+
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/documentation/main_classes_model.txt b/documentation/main_classes_model.txt
new file mode 100644
index 0000000000000000000000000000000000000000..130ffb38bb77adee3c867fec22cb5941735a159b
--- /dev/null
+++ b/documentation/main_classes_model.txt
@@ -0,0 +1,37 @@
+
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
+
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/documentation/main_classes_onnx.txt b/documentation/main_classes_onnx.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0d2fbe26adfe359fbe31e75205051ebb85702a7
--- /dev/null
+++ b/documentation/main_classes_onnx.txt
@@ -0,0 +1,25 @@
+
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
+
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/documentation/main_classes_optimizer_schedules.txt b/documentation/main_classes_optimizer_schedules.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1227dba2e5eecc3871305fb92b78281744a6d4ec
--- /dev/null
+++ b/documentation/main_classes_optimizer_schedules.txt
@@ -0,0 +1,36 @@
+
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/documentation/main_classes_output.txt b/documentation/main_classes_output.txt
new file mode 100644
index 0000000000000000000000000000000000000000..787bade4c7dd2c718d1d64b66d5ded25b7989b8d
--- /dev/null
+++ b/documentation/main_classes_output.txt
@@ -0,0 +1,163 @@
+
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
+
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
+
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/documentation/main_classes_pipelines.txt b/documentation/main_classes_pipelines.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cfab9c8a9d89741be1fcc843f7969b51395ccdd
--- /dev/null
+++ b/documentation/main_classes_pipelines.txt
@@ -0,0 +1,343 @@
+
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
+
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
+
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
+
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
+
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
+
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/documentation/main_classes_processors.txt b/documentation/main_classes_processors.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cd51049cd5d1b786bf0d19669138566899ba0b8
--- /dev/null
+++ b/documentation/main_classes_processors.txt
@@ -0,0 +1,106 @@
+
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
+
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
+
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
+
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/documentation/main_classes_quantization.txt b/documentation/main_classes_quantization.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37bd40d5b112f4c17983f3e1bff922735419ff29
--- /dev/null
+++ b/documentation/main_classes_quantization.txt
@@ -0,0 +1,23 @@
+
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/documentation/main_classes_text_generation.txt b/documentation/main_classes_text_generation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e76a4b9ffc1db30a6b9dbe5bdba1437e9b8799ef
--- /dev/null
+++ b/documentation/main_classes_text_generation.txt
@@ -0,0 +1,35 @@
+
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
+
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/documentation/main_classes_tokenizer.txt b/documentation/main_classes_tokenizer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c1944fc70fc25c01ad1dba5c3c863a288d93b24
--- /dev/null
+++ b/documentation/main_classes_tokenizer.txt
@@ -0,0 +1,60 @@
+
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token). 
+
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
+
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
+
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/documentation/main_classes_trainer.txt b/documentation/main_classes_trainer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7db795a2220664650d20a17facf0daa80e5f2117
--- /dev/null
+++ b/documentation/main_classes_trainer.txt
@@ -0,0 +1,26 @@
+
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
+
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/documentation/model_doc_albert.txt b/documentation/model_doc_albert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477b4bed7026b8ba113728601492535971b89738
--- /dev/null
+++ b/documentation/model_doc_albert.txt
@@ -0,0 +1,154 @@
+
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
+
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
+
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
+
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
+
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
+
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_align.txt b/documentation/model_doc_align.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a762b81b949a3904151ed5ac5396e4f23879584b
--- /dev/null
+++ b/documentation/model_doc_align.txt
@@ -0,0 +1,58 @@
+
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_altclip.txt b/documentation/model_doc_altclip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad987e9c06dcef24c4e5993f16cd2914fee16ac4
--- /dev/null
+++ b/documentation/model_doc_altclip.txt
@@ -0,0 +1,64 @@
+
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_audio-spectrogram-transformer.txt b/documentation/model_doc_audio-spectrogram-transformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8a05b74419c36e38b3e15b78662cd92399735cf
--- /dev/null
+++ b/documentation/model_doc_audio-spectrogram-transformer.txt
@@ -0,0 +1,59 @@
+
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
+
+ Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
+
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_auto.txt b/documentation/model_doc_auto.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6302419a4c27bf5755f5d4dec51c01c58362b14
--- /dev/null
+++ b/documentation/model_doc_auto.txt
@@ -0,0 +1,190 @@
+
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
+
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
+
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/documentation/model_doc_autoformer.txt b/documentation/model_doc_autoformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9a61b8e5c21335ae507936c850df080a77ec0c3
--- /dev/null
+++ b/documentation/model_doc_autoformer.txt
@@ -0,0 +1,22 @@
+
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_bark.txt b/documentation/model_doc_bark.txt
new file mode 100644
index 0000000000000000000000000000000000000000..410ee877710e5d42ed50af6d4ddf962c4def92c3
--- /dev/null
+++ b/documentation/model_doc_bark.txt
@@ -0,0 +1,136 @@
+
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
+
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
+
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
+
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
+
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
+
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
+
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/documentation/model_doc_bart.txt b/documentation/model_doc_bart.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6bbeac865fd0f0a4b4983cb1ef0373ba121f654
--- /dev/null
+++ b/documentation/model_doc_bart.txt
@@ -0,0 +1,149 @@
+
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
+
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
+
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
+
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
+
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
+
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
+
diff --git a/documentation/model_doc_barthez.txt b/documentation/model_doc_barthez.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6149abaab6ba64fabf6c960c9eb1a08c63728f6a
--- /dev/null
+++ b/documentation/model_doc_barthez.txt
@@ -0,0 +1,31 @@
+
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.  
+
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/documentation/model_doc_bartpho.txt b/documentation/model_doc_bartpho.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94ff9eb4261e6a9c9098fd9b1b2f6cca4c6779c8
--- /dev/null
+++ b/documentation/model_doc_bartpho.txt
@@ -0,0 +1,55 @@
+
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/documentation/model_doc_beit.txt b/documentation/model_doc_beit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cce3903e2c0bd17fde1c64f67bc7f1118e0c7a2a
--- /dev/null
+++ b/documentation/model_doc_beit.txt
@@ -0,0 +1,94 @@
+
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
+
+ BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
+
diff --git a/documentation/model_doc_bert-generation.txt b/documentation/model_doc_bert-generation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50125a8cf065deaf1a0ffa804b65f98f411f7643
--- /dev/null
+++ b/documentation/model_doc_bert-generation.txt
@@ -0,0 +1,70 @@
+
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
+
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
+
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
+
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_bert-japanese.txt b/documentation/model_doc_bert-japanese.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74b3a20dcfc2d5060913c91b04e2818d28d1726c
--- /dev/null
+++ b/documentation/model_doc_bert-japanese.txt
@@ -0,0 +1,45 @@
+
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/documentation/model_doc_bert.txt b/documentation/model_doc_bert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b149670c215ae9adab9d420abc80d2c5de631d6
--- /dev/null
+++ b/documentation/model_doc_bert.txt
@@ -0,0 +1,220 @@
+
+BERT
+
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
+
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
+
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
+
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_bertweet.txt b/documentation/model_doc_bertweet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65edb3b3c9c5db0a6a7bbcac4279fa75691d182a
--- /dev/null
+++ b/documentation/model_doc_bertweet.txt
@@ -0,0 +1,36 @@
+
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/documentation/model_doc_big_bird.txt b/documentation/model_doc_big_bird.txt
new file mode 100644
index 0000000000000000000000000000000000000000..740d2a3242c3b57884e5c440055092b3c01287ae
--- /dev/null
+++ b/documentation/model_doc_big_bird.txt
@@ -0,0 +1,109 @@
+
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
+
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_bigbird_pegasus.txt b/documentation/model_doc_bigbird_pegasus.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb1bdc7696624ed4887508fb90a2541169cbc6b9
--- /dev/null
+++ b/documentation/model_doc_bigbird_pegasus.txt
@@ -0,0 +1,63 @@
+
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_biogpt.txt b/documentation/model_doc_biogpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cecf9d319f06d6816e2f56f5c9cf206011fcab78
--- /dev/null
+++ b/documentation/model_doc_biogpt.txt
@@ -0,0 +1,34 @@
+
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_bit.txt b/documentation/model_doc_bit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4474916c91d7a8a873130b91fa3a67826ec8342b
--- /dev/null
+++ b/documentation/model_doc_bit.txt
@@ -0,0 +1,33 @@
+
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_blenderbot-small.txt b/documentation/model_doc_blenderbot-small.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21cc48c7ec94716724a06379789954ec76f5cd64
--- /dev/null
+++ b/documentation/model_doc_blenderbot-small.txt
@@ -0,0 +1,71 @@
+
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
+
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
+
diff --git a/documentation/model_doc_blenderbot.txt b/documentation/model_doc_blenderbot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1faff4a8bccc6f20fec2ba09102297f99c7135c7
--- /dev/null
+++ b/documentation/model_doc_blenderbot.txt
@@ -0,0 +1,86 @@
+
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
+
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
+
diff --git a/documentation/model_doc_blip-2.txt b/documentation/model_doc_blip-2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6c534e03cd39c6eab9a547a0c62614c3c099d7c
--- /dev/null
+++ b/documentation/model_doc_blip-2.txt
@@ -0,0 +1,49 @@
+
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/documentation/model_doc_blip.txt b/documentation/model_doc_blip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e16db7b2a77ba3f86c2c7af58d808fa18aa83bf
--- /dev/null
+++ b/documentation/model_doc_blip.txt
@@ -0,0 +1,74 @@
+
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
+
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
+
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
+
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_bloom.txt b/documentation/model_doc_bloom.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f8807fc4b7c58f66b2cc60b9d09aa82243c33d8
--- /dev/null
+++ b/documentation/model_doc_bloom.txt
@@ -0,0 +1,59 @@
+
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
+
diff --git a/documentation/model_doc_bort.txt b/documentation/model_doc_bort.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41ffa339e7f3b8268e7f2d56021afdba6f5a987b
--- /dev/null
+++ b/documentation/model_doc_bort.txt
@@ -0,0 +1,30 @@
+
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
diff --git a/documentation/model_doc_bridgetower.txt b/documentation/model_doc_bridgetower.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72beeb6af7fd003caeb41c54a0f44de007376b90
--- /dev/null
+++ b/documentation/model_doc_bridgetower.txt
@@ -0,0 +1,110 @@
+
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
+
+ BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
+
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_bros.txt b/documentation/model_doc_bros.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a57ee93ffb800b017072d174f63b748b0f1b168b
--- /dev/null
+++ b/documentation/model_doc_bros.txt
@@ -0,0 +1,73 @@
+
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
+
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
+
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
+
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_byt5.txt b/documentation/model_doc_byt5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b8d7a1bb03140fb2a3b4bfa969b537882bf2898
--- /dev/null
+++ b/documentation/model_doc_byt5.txt
@@ -0,0 +1,106 @@
+
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
+
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/documentation/model_doc_camembert.txt b/documentation/model_doc_camembert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6c76df00092ffb79ceb3473ec6e8e928ffe6c36
--- /dev/null
+++ b/documentation/model_doc_camembert.txt
@@ -0,0 +1,71 @@
+
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
+
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
+
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
+
diff --git a/documentation/model_doc_canine.txt b/documentation/model_doc_canine.txt
new file mode 100644
index 0000000000000000000000000000000000000000..595da5ac4ff2dff49b377492ef0813fc2c8d1c18
--- /dev/null
+++ b/documentation/model_doc_canine.txt
@@ -0,0 +1,99 @@
+
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
+
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
+
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_chinese_clip.txt b/documentation/model_doc_chinese_clip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62c1d12aba669be73ed30374751a50cadd563ee1
--- /dev/null
+++ b/documentation/model_doc_chinese_clip.txt
@@ -0,0 +1,67 @@
+
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
+
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
+
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
+
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_clap.txt b/documentation/model_doc_clap.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa0c399479471113f305eea9cd1a0820e6a305e
--- /dev/null
+++ b/documentation/model_doc_clap.txt
@@ -0,0 +1,38 @@
+
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_clip.txt b/documentation/model_doc_clip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bb818a9ca35ceea18a8e09eebf20d919d5b52c4
--- /dev/null
+++ b/documentation/model_doc_clip.txt
@@ -0,0 +1,142 @@
+
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
+
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
+
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
+
diff --git a/documentation/model_doc_clipseg.txt b/documentation/model_doc_clipseg.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eacc1d4243e7a66377453f9cbccce06c961da9f6
--- /dev/null
+++ b/documentation/model_doc_clipseg.txt
@@ -0,0 +1,62 @@
+
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_clvp.txt b/documentation/model_doc_clvp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be3284765817c47ed21f0f69d07a7de0bd0f9272
--- /dev/null
+++ b/documentation/model_doc_clvp.txt
@@ -0,0 +1,73 @@
+
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
+
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/documentation/model_doc_code_llama.txt b/documentation/model_doc_code_llama.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0473086b2d79c095820e12d1750c3e1227c65d42
--- /dev/null
+++ b/documentation/model_doc_code_llama.txt
@@ -0,0 +1,84 @@
+
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
+
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
+
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
+
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
+
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
+
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below. 
+
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/documentation/model_doc_codegen.txt b/documentation/model_doc_codegen.txt
new file mode 100644
index 0000000000000000000000000000000000000000..506ffa403572fa314335bc1a0839f0d8dc07eb68
--- /dev/null
+++ b/documentation/model_doc_codegen.txt
@@ -0,0 +1,55 @@
+
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
+
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
+
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_cohere.txt b/documentation/model_doc_cohere.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86d09da829419696b390538a3e0f3cf4aa6310f2
--- /dev/null
+++ b/documentation/model_doc_cohere.txt
@@ -0,0 +1,97 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
+
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_conditional_detr.txt b/documentation/model_doc_conditional_detr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6583826a79ed176db597b9892378a4cc7f7f62c
--- /dev/null
+++ b/documentation/model_doc_conditional_detr.txt
@@ -0,0 +1,39 @@
+
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
+
+ Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_convbert.txt b/documentation/model_doc_convbert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96a833efb7f1eb490c8c596c0056c6b0b5e4bce3
--- /dev/null
+++ b/documentation/model_doc_convbert.txt
@@ -0,0 +1,79 @@
+
+ConvBERT
+
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
+
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
+
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_convnext.txt b/documentation/model_doc_convnext.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd1569572990339f9bed0e9a97cda54e346065e3
--- /dev/null
+++ b/documentation/model_doc_convnext.txt
@@ -0,0 +1,47 @@
+
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
+
+ ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
+
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
+
diff --git a/documentation/model_doc_convnextv2.txt b/documentation/model_doc_convnextv2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e3cc59bc241ee0d330515ad6adcfd5576e1d925
--- /dev/null
+++ b/documentation/model_doc_convnextv2.txt
@@ -0,0 +1,30 @@
+
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
+
+ ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_cpm.txt b/documentation/model_doc_cpm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61c684cf65c32cea5354b3ef5559208cec5d9ff9
--- /dev/null
+++ b/documentation/model_doc_cpm.txt
@@ -0,0 +1,26 @@
+
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
+
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/documentation/model_doc_cpmant.txt b/documentation/model_doc_cpmant.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52ea5957bbbc81117374792e25e644f56e6f8886
--- /dev/null
+++ b/documentation/model_doc_cpmant.txt
@@ -0,0 +1,21 @@
+
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/documentation/model_doc_ctrl.txt b/documentation/model_doc_ctrl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb48a0a1beb58b52e3e7666b4168623f992d27d5
--- /dev/null
+++ b/documentation/model_doc_ctrl.txt
@@ -0,0 +1,63 @@
+
+CTRL
+
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
+
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
+
diff --git a/documentation/model_doc_cvt.txt b/documentation/model_doc_cvt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..685ccb4233fb161f413e068463b5a63a7e4bace6
--- /dev/null
+++ b/documentation/model_doc_cvt.txt
@@ -0,0 +1,47 @@
+
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
+
diff --git a/documentation/model_doc_data2vec.txt b/documentation/model_doc_data2vec.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cab1ef56450832bcff937ae757b0d4dcafa7793
--- /dev/null
+++ b/documentation/model_doc_data2vec.txt
@@ -0,0 +1,111 @@
+
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
+
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
+
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
+
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
+
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
+
diff --git a/documentation/model_doc_dbrx.txt b/documentation/model_doc_dbrx.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57249533adc8e3a125fa42ecdbd85b2831dac511
--- /dev/null
+++ b/documentation/model_doc_dbrx.txt
@@ -0,0 +1,79 @@
+
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_deberta-v2.txt b/documentation/model_doc_deberta-v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47588c0afa394fc7b6eced3867fa169cf2b5d70c
--- /dev/null
+++ b/documentation/model_doc_deberta-v2.txt
@@ -0,0 +1,105 @@
+
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
+
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
+
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
+
diff --git a/documentation/model_doc_deberta.txt b/documentation/model_doc_deberta.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8196541ce451314fa35421aaece1475888798429
--- /dev/null
+++ b/documentation/model_doc_deberta.txt
@@ -0,0 +1,96 @@
+
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
+
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_decision_transformer.txt b/documentation/model_doc_decision_transformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..590c2d20691b9786d8c8ae602fc1cafe3a6d1eed
--- /dev/null
+++ b/documentation/model_doc_decision_transformer.txt
@@ -0,0 +1,25 @@
+
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_deformable_detr.txt b/documentation/model_doc_deformable_detr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b25edff63d4fe57224e97e596becaaed4ca57e69
--- /dev/null
+++ b/documentation/model_doc_deformable_detr.txt
@@ -0,0 +1,38 @@
+
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
+
+ Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_deit.txt b/documentation/model_doc_deit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2d5652b21a95685fc9bb35006e83ba4da118043
--- /dev/null
+++ b/documentation/model_doc_deit.txt
@@ -0,0 +1,114 @@
+
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
+
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
+
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
+
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
+
diff --git a/documentation/model_doc_deplot.txt b/documentation/model_doc_deplot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53b2cf060404cd51747db4a68155f4d659902796
--- /dev/null
+++ b/documentation/model_doc_deplot.txt
@@ -0,0 +1,33 @@
+
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
+
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
diff --git a/documentation/model_doc_depth_anything.txt b/documentation/model_doc_depth_anything.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fadb65ceba03edeeefc1e7875db2f412d1a0eca8
--- /dev/null
+++ b/documentation/model_doc_depth_anything.txt
@@ -0,0 +1,69 @@
+
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
+
+ Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_deta.txt b/documentation/model_doc_deta.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1abe7919c78b5d89ec4e2346351ce5211453027
--- /dev/null
+++ b/documentation/model_doc_deta.txt
@@ -0,0 +1,37 @@
+
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
+
+ DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_detr.txt b/documentation/model_doc_detr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..176b7c0e0366e6b03c43fd7f8dda63469a0e6c15
--- /dev/null
+++ b/documentation/model_doc_detr.txt
@@ -0,0 +1,158 @@
+
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
+
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
+
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
+
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
+
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_dialogpt.txt b/documentation/model_doc_dialogpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a85218ba2a116239ed623de0d3b0cdf718953730
--- /dev/null
+++ b/documentation/model_doc_dialogpt.txt
@@ -0,0 +1,30 @@
+
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
+
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
+
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
diff --git a/documentation/model_doc_dinat.txt b/documentation/model_doc_dinat.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bf52809b819709a3d21aeced0645811bbbe2bd1
--- /dev/null
+++ b/documentation/model_doc_dinat.txt
@@ -0,0 +1,53 @@
+
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
+
+ Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
+
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_dinov2.txt b/documentation/model_doc_dinov2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c699515994068931b4003b20495b77ec60a8908e
--- /dev/null
+++ b/documentation/model_doc_dinov2.txt
@@ -0,0 +1,48 @@
+
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_distilbert.txt b/documentation/model_doc_distilbert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..043a11233697ae62939cb8e49082e9ebb79ba0ff
--- /dev/null
+++ b/documentation/model_doc_distilbert.txt
@@ -0,0 +1,173 @@
+
+DistilBERT
+
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
+
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
+
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
+
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_dit.txt b/documentation/model_doc_dit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef1109154030b5cdb2e237f4d75cc34542e93e3d
--- /dev/null
+++ b/documentation/model_doc_dit.txt
@@ -0,0 +1,45 @@
+
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
+
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
+
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
diff --git a/documentation/model_doc_donut.txt b/documentation/model_doc_donut.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9304f407561837ef306dea62b22d03dc9626184f
--- /dev/null
+++ b/documentation/model_doc_donut.txt
@@ -0,0 +1,150 @@
+
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
+
+ Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
+
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Document Image Classification
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
+
+Step-by-step Document Parsing
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
+
+Step-by-step Document Visual Question Answering (DocVQA)
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
+
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_dpr.txt b/documentation/model_doc_dpr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..070f335b40d46e30f0ccbd0ec83897ffb208a88e
--- /dev/null
+++ b/documentation/model_doc_dpr.txt
@@ -0,0 +1,63 @@
+
+DPR
+
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
+
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
+
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
+
diff --git a/documentation/model_doc_dpt.txt b/documentation/model_doc_dpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d53abf2982744d1dea2becbd8828a51436dae82
--- /dev/null
+++ b/documentation/model_doc_dpt.txt
@@ -0,0 +1,49 @@
+
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
+
+ DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_efficientformer.txt b/documentation/model_doc_efficientformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..963508ac609fd9e08d91fcb4e50745ac7d72c13b
--- /dev/null
+++ b/documentation/model_doc_efficientformer.txt
@@ -0,0 +1,59 @@
+
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
+
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
+
diff --git a/documentation/model_doc_efficientnet.txt b/documentation/model_doc_efficientnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0c32631739798dcf2ca8316c869e8b2b0cc89c2
--- /dev/null
+++ b/documentation/model_doc_efficientnet.txt
@@ -0,0 +1,21 @@
+
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_electra.txt b/documentation/model_doc_electra.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36c241c7eb9968cc6ee84e6f91be2156d0c582e6
--- /dev/null
+++ b/documentation/model_doc_electra.txt
@@ -0,0 +1,132 @@
+
+ELECTRA
+
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
+
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
+
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
+
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
+
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_encodec.txt b/documentation/model_doc_encodec.txt
new file mode 100644
index 0000000000000000000000000000000000000000..748bf269985ba282c1ba01e442a8f831667fd1bd
--- /dev/null
+++ b/documentation/model_doc_encodec.txt
@@ -0,0 +1,35 @@
+
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon 
+
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
+
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_encoder-decoder.txt b/documentation/model_doc_encoder-decoder.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f1555ea7a8590c1d31cd62862fec2d4598ff52a
--- /dev/null
+++ b/documentation/model_doc_encoder-decoder.txt
@@ -0,0 +1,116 @@
+
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
+
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
+
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
+
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
+
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
diff --git a/documentation/model_doc_ernie.txt b/documentation/model_doc_ernie.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08d7280c4c0335e42bbd64215af38ea55da892bb
--- /dev/null
+++ b/documentation/model_doc_ernie.txt
@@ -0,0 +1,70 @@
+
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_ernie_m.txt b/documentation/model_doc_ernie_m.txt
new file mode 100644
index 0000000000000000000000000000000000000000..971c29ab649cec2478f8dbfac5fcefe44b659003
--- /dev/null
+++ b/documentation/model_doc_ernie_m.txt
@@ -0,0 +1,54 @@
+
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
+
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_esm.txt b/documentation/model_doc_esm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75b79e35985f9d29e69c37d703ad9449ca882a74
--- /dev/null
+++ b/documentation/model_doc_esm.txt
@@ -0,0 +1,106 @@
+
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
+
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
+
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
+
diff --git a/documentation/model_doc_falcon.txt b/documentation/model_doc_falcon.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0cae57a92afa9e5af95879853cfffafb3de13c6
--- /dev/null
+++ b/documentation/model_doc_falcon.txt
@@ -0,0 +1,43 @@
+
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
+
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
+
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_fastspeech2_conformer.txt b/documentation/model_doc_fastspeech2_conformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce3c0b33ed82d04a52715d6f4e4f55d146757081
--- /dev/null
+++ b/documentation/model_doc_fastspeech2_conformer.txt
@@ -0,0 +1,82 @@
+
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
+
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_flan-t5.txt b/documentation/model_doc_flan-t5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd1cd32c1d5f76c1ec434b8fb1cc440b12f222bb
--- /dev/null
+++ b/documentation/model_doc_flan-t5.txt
@@ -0,0 +1,31 @@
+
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
diff --git a/documentation/model_doc_flan-ul2.txt b/documentation/model_doc_flan-ul2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7aeb6f01e540eaf4efd7d6ed9f7c9296e3e9d47
--- /dev/null
+++ b/documentation/model_doc_flan-ul2.txt
@@ -0,0 +1,26 @@
+
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
+
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
+
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks. 
diff --git a/documentation/model_doc_flaubert.txt b/documentation/model_doc_flaubert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5142147ace3228f1a246f69b91a39521ac7c0ff4
--- /dev/null
+++ b/documentation/model_doc_flaubert.txt
@@ -0,0 +1,76 @@
+
+FlauBERT
+
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
+
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
+
diff --git a/documentation/model_doc_flava.txt b/documentation/model_doc_flava.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b41603cf765f127dbc525e4abec0fe0c569c3ac4
--- /dev/null
+++ b/documentation/model_doc_flava.txt
@@ -0,0 +1,54 @@
+
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_fnet.txt b/documentation/model_doc_fnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa67b91cc1d61cc30bcb40c78ede5894157a30e7
--- /dev/null
+++ b/documentation/model_doc_fnet.txt
@@ -0,0 +1,67 @@
+
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_focalnet.txt b/documentation/model_doc_focalnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..743ece97a5944230ea46f48203ce8208fcca5662
--- /dev/null
+++ b/documentation/model_doc_focalnet.txt
@@ -0,0 +1,22 @@
+
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_fsmt.txt b/documentation/model_doc_fsmt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ec2379f3ee97df97c4fbd3190e7721ea79bcf3
--- /dev/null
+++ b/documentation/model_doc_fsmt.txt
@@ -0,0 +1,35 @@
+
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
+
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_funnel.txt b/documentation/model_doc_funnel.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01c9bc205bcbb15cce56e8ec4cc5d26de758192
--- /dev/null
+++ b/documentation/model_doc_funnel.txt
@@ -0,0 +1,108 @@
+
+Funnel Transformer
+
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
+
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
+
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
+
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_fuyu.txt b/documentation/model_doc_fuyu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52b8bfe2c8dc23399378f40e72c5956e70236a4c
--- /dev/null
+++ b/documentation/model_doc_fuyu.txt
@@ -0,0 +1,65 @@
+
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
+
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
+
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_gemma.txt b/documentation/model_doc_gemma.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c71cee8e5224f25f3327bbf9809e2f04915d492
--- /dev/null
+++ b/documentation/model_doc_gemma.txt
@@ -0,0 +1,36 @@
+
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_git.txt b/documentation/model_doc_git.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fe57baa0808d525c2a4d2d5a278bdafb2a81156
--- /dev/null
+++ b/documentation/model_doc_git.txt
@@ -0,0 +1,42 @@
+
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
+
+ GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_glpn.txt b/documentation/model_doc_glpn.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7b83272830c599188b44aa954268893798f9a58
--- /dev/null
+++ b/documentation/model_doc_glpn.txt
@@ -0,0 +1,35 @@
+
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_gpt-sw3.txt b/documentation/model_doc_gpt-sw3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ae47eee3a3916dc81632567fe07992b66f5dc44
--- /dev/null
+++ b/documentation/model_doc_gpt-sw3.txt
@@ -0,0 +1,37 @@
+
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/documentation/model_doc_gpt2.txt b/documentation/model_doc_gpt2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b0c2cb1b61a75da075dce757a427d4fd0453a31
--- /dev/null
+++ b/documentation/model_doc_gpt2.txt
@@ -0,0 +1,192 @@
+
+OpenAI GPT2
+
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
+
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
+
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
+
diff --git a/documentation/model_doc_gpt_bigcode.txt b/documentation/model_doc_gpt_bigcode.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7dbd463ce622bef643e0f69939f1ad9ab1c7dd0
--- /dev/null
+++ b/documentation/model_doc_gpt_bigcode.txt
@@ -0,0 +1,57 @@
+
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_gpt_neo.txt b/documentation/model_doc_gpt_neo.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a14f23a65e041234dd0970e8e21777e6f8a0ee5
--- /dev/null
+++ b/documentation/model_doc_gpt_neo.txt
@@ -0,0 +1,83 @@
+
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
+
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
+
diff --git a/documentation/model_doc_gpt_neox.txt b/documentation/model_doc_gpt_neox.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e33e01b7da529bb300bb88946f33ed3db201234
--- /dev/null
+++ b/documentation/model_doc_gpt_neox.txt
@@ -0,0 +1,75 @@
+
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_gpt_neox_japanese.txt b/documentation/model_doc_gpt_neox_japanese.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb979843c05e2cd3e191d915fd12ff1245f5093d
--- /dev/null
+++ b/documentation/model_doc_gpt_neox_japanese.txt
@@ -0,0 +1,41 @@
+
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
+
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_gptj.txt b/documentation/model_doc_gptj.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48cc3e9a9f2157832b2cf4cea53b379f86df2861
--- /dev/null
+++ b/documentation/model_doc_gptj.txt
@@ -0,0 +1,137 @@
+
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
+
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
+
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
+
diff --git a/documentation/model_doc_gptsan-japanese.txt b/documentation/model_doc_gptsan-japanese.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcc134392915cb1564aa3f28f5948ebe8a59f325
--- /dev/null
+++ b/documentation/model_doc_gptsan-japanese.txt
@@ -0,0 +1,87 @@
+
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
+
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
+
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
+
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_graphormer.txt b/documentation/model_doc_graphormer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ce34d552387054e635453ef8d9a9583aa28474e
--- /dev/null
+++ b/documentation/model_doc_graphormer.txt
@@ -0,0 +1,25 @@
+
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_grounding-dino.txt b/documentation/model_doc_grounding-dino.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b108d23909467fc35f4d95ccb10ed1c7daf7bd4
--- /dev/null
+++ b/documentation/model_doc_grounding-dino.txt
@@ -0,0 +1,64 @@
+
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
+
+ Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
+
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
+
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
+
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_groupvit.txt b/documentation/model_doc_groupvit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bf5be6cdb2ddf4683a8e545a94e26bd4ab72b03
--- /dev/null
+++ b/documentation/model_doc_groupvit.txt
@@ -0,0 +1,51 @@
+
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
+
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
+
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
+
diff --git a/documentation/model_doc_herbert.txt b/documentation/model_doc_herbert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..194e31fbc3c98af3bb28c131ed476736cf6b0227
--- /dev/null
+++ b/documentation/model_doc_herbert.txt
@@ -0,0 +1,42 @@
+
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
+
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/documentation/model_doc_hubert.txt b/documentation/model_doc_hubert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71265085169a9b9775c134632bbd84d4c608a8d6
--- /dev/null
+++ b/documentation/model_doc_hubert.txt
@@ -0,0 +1,68 @@
+
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
+
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
+
diff --git a/documentation/model_doc_ibert.txt b/documentation/model_doc_ibert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50148ef8f4045e69b8655099fc47f92b4edc82d5
--- /dev/null
+++ b/documentation/model_doc_ibert.txt
@@ -0,0 +1,49 @@
+
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_idefics.txt b/documentation/model_doc_idefics.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60c32ccc0d479725db517dc7864b726dd4669fa6
--- /dev/null
+++ b/documentation/model_doc_idefics.txt
@@ -0,0 +1,32 @@
+
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
+
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
+
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_idefics2.txt b/documentation/model_doc_idefics2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19ea0fa85acf0c4b470c183677725c0b355d5b94
--- /dev/null
+++ b/documentation/model_doc_idefics2.txt
@@ -0,0 +1,147 @@
+
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
+
+ Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
+
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
+
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
+
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
+
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_imagegpt.txt b/documentation/model_doc_imagegpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11fe8e3208cf662c7c404cb16970033d90b86eb2
--- /dev/null
+++ b/documentation/model_doc_imagegpt.txt
@@ -0,0 +1,76 @@
+
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
+
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
+
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_informer.txt b/documentation/model_doc_informer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81b3a8750dd59343e29dc8dfb580bf2fc5ee9cd4
--- /dev/null
+++ b/documentation/model_doc_informer.txt
@@ -0,0 +1,22 @@
+
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_instructblip.txt b/documentation/model_doc_instructblip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1053df389e9213ad434b4cbb945116c6700872fc
--- /dev/null
+++ b/documentation/model_doc_instructblip.txt
@@ -0,0 +1,32 @@
+
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
+
+ InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/documentation/model_doc_jamba.txt b/documentation/model_doc_jamba.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b441013ddcc40995b3a96c7062369f01a65047ba
--- /dev/null
+++ b/documentation/model_doc_jamba.txt
@@ -0,0 +1,70 @@
+
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
+
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
+
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
+
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
+
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
+
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_jetmoe.txt b/documentation/model_doc_jetmoe.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac1164605d5d9ca15b571a5f79a531fb9e847805
--- /dev/null
+++ b/documentation/model_doc_jetmoe.txt
@@ -0,0 +1,22 @@
+
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_jukebox.txt b/documentation/model_doc_jukebox.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e3fecedd17ae73924cd8db8cba41465c9f3d1d8
--- /dev/null
+++ b/documentation/model_doc_jukebox.txt
@@ -0,0 +1,54 @@
+
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/documentation/model_doc_kosmos-2.txt b/documentation/model_doc_kosmos-2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba70fe05b74c0ab380d8bd2b321530f71e968918
--- /dev/null
+++ b/documentation/model_doc_kosmos-2.txt
@@ -0,0 +1,57 @@
+
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
+
+ Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
+
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_layoutlm.txt b/documentation/model_doc_layoutlm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..feb36d9628a504e4e9ee82d1db2ee892732b610a
--- /dev/null
+++ b/documentation/model_doc_layoutlm.txt
@@ -0,0 +1,107 @@
+
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
+
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
+
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
+
diff --git a/documentation/model_doc_layoutlmv2.txt b/documentation/model_doc_layoutlmv2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2d30add5d61f413758d424c669a4ffedf7c99da
--- /dev/null
+++ b/documentation/model_doc_layoutlmv2.txt
@@ -0,0 +1,253 @@
+
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
+
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
+
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
+
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
+
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
+
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
+
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
+
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
+
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
+
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
+
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/documentation/model_doc_layoutlmv3.txt b/documentation/model_doc_layoutlmv3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21f104962b61466f0882ce3a6bf2edeed4a64fa8
--- /dev/null
+++ b/documentation/model_doc_layoutlmv3.txt
@@ -0,0 +1,86 @@
+
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
+
+ LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
+
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
+
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
+
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_layoutxlm.txt b/documentation/model_doc_layoutxlm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fb412693e904072b3aff017654564ea1592ac4e
--- /dev/null
+++ b/documentation/model_doc_layoutxlm.txt
@@ -0,0 +1,49 @@
+
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
+
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_led.txt b/documentation/model_doc_led.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02f15a3ca8eb3ff7ac9f8406fd2459b20a3bce90
--- /dev/null
+++ b/documentation/model_doc_led.txt
@@ -0,0 +1,87 @@
+
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
+
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
+
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
+
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
+
diff --git a/documentation/model_doc_levit.txt b/documentation/model_doc_levit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b2de5fc614e2f391f2c32709ee7b8a286708257
--- /dev/null
+++ b/documentation/model_doc_levit.txt
@@ -0,0 +1,67 @@
+
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
+
+ LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_lilt.txt b/documentation/model_doc_lilt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2123cc02afaa40b38872371b6d4708fa4b154705
--- /dev/null
+++ b/documentation/model_doc_lilt.txt
@@ -0,0 +1,50 @@
+
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
+
+ LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_llama.txt b/documentation/model_doc_llama.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cc0366d7d315a910529d08d541a4a51ca2cbe53
--- /dev/null
+++ b/documentation/model_doc_llama.txt
@@ -0,0 +1,83 @@
+
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
+
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_llama2.txt b/documentation/model_doc_llama2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4061dba8dccc4289ffe0cbe5145f3f7ea283790
--- /dev/null
+++ b/documentation/model_doc_llama2.txt
@@ -0,0 +1,85 @@
+
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
+
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
+
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_llama3.txt b/documentation/model_doc_llama3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d340fd9edba316087cc2fc60e4716d8249ecdcc8
--- /dev/null
+++ b/documentation/model_doc_llama3.txt
@@ -0,0 +1,48 @@
+
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
+
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/documentation/model_doc_llava.txt b/documentation/model_doc_llava.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7310770612bb2612f4fc6fcb3156352f75b126c5
--- /dev/null
+++ b/documentation/model_doc_llava.txt
@@ -0,0 +1,38 @@
+
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
+
+ LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_llava_next.txt b/documentation/model_doc_llava_next.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db8f052872d9b861466b5c1bb565532204e08ce9
--- /dev/null
+++ b/documentation/model_doc_llava_next.txt
@@ -0,0 +1,115 @@
+
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
+
+ LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
+
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
+
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
+
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
+
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_longformer.txt b/documentation/model_doc_longformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8420e1aca3993d1971f0cff8ae6e2ea1c90bc145
--- /dev/null
+++ b/documentation/model_doc_longformer.txt
@@ -0,0 +1,123 @@
+
+Longformer
+
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
+
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
+
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
+
+0: the token attends "locally",
+1: the token attends "globally".
+
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
+
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
+
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
+
diff --git a/documentation/model_doc_longt5.txt b/documentation/model_doc_longt5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8687c7c702db00bbfbd14acca05687d51cce5bfb
--- /dev/null
+++ b/documentation/model_doc_longt5.txt
@@ -0,0 +1,94 @@
+
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
+
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
+
+thon
+
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+
diff --git a/documentation/model_doc_luke.txt b/documentation/model_doc_luke.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc161c5dc164c6267f1b0cfea61d17819b04504e
--- /dev/null
+++ b/documentation/model_doc_luke.txt
@@ -0,0 +1,142 @@
+
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
+
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
+
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
+
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
+
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
+
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
+
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
+
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
+
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_lxmert.txt b/documentation/model_doc_lxmert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..799fe45437132af2a6bb401fb24e41659f89dd42
--- /dev/null
+++ b/documentation/model_doc_lxmert.txt
@@ -0,0 +1,70 @@
+
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
+
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
+
diff --git a/documentation/model_doc_m2m_100.txt b/documentation/model_doc_m2m_100.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3783ec254f1531bfbc79cda165720e7809f777db
--- /dev/null
+++ b/documentation/model_doc_m2m_100.txt
@@ -0,0 +1,105 @@
+
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
+
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
+
diff --git a/documentation/model_doc_madlad-400.txt b/documentation/model_doc_madlad-400.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bf867d7c361e05e5150950c535e4924ce5b47c3
--- /dev/null
+++ b/documentation/model_doc_madlad-400.txt
@@ -0,0 +1,39 @@
+
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
diff --git a/documentation/model_doc_mamba.txt b/documentation/model_doc_mamba.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d6a1ae82f6b289897d0131708e8b17e95e42d48
--- /dev/null
+++ b/documentation/model_doc_mamba.txt
@@ -0,0 +1,70 @@
+
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
+
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
+
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
+
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_marian.txt b/documentation/model_doc_marian.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e43d47f1ebb8060f6275c46e37c26908ecce0f78
--- /dev/null
+++ b/documentation/model_doc_marian.txt
@@ -0,0 +1,157 @@
+
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
+
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
+
diff --git a/documentation/model_doc_markuplm.txt b/documentation/model_doc_markuplm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c456dded1789eed57612a92124ce2cf5da260154
--- /dev/null
+++ b/documentation/model_doc_markuplm.txt
@@ -0,0 +1,182 @@
+
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
+
+ 
+ MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
+
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
+
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mask2former.txt b/documentation/model_doc_mask2former.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ffe25d237d67ebd207e94f63728c55651f64e77
--- /dev/null
+++ b/documentation/model_doc_mask2former.txt
@@ -0,0 +1,41 @@
+
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
+
+ Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/documentation/model_doc_maskformer.txt b/documentation/model_doc_maskformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8afa8e13d6cd6ec6fc55e0257229934bc22966d
--- /dev/null
+++ b/documentation/model_doc_maskformer.txt
@@ -0,0 +1,52 @@
+
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
+
+This model was contributed by francesco. The original code can be found here.
+Usage tips
+
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
+
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_matcha.txt b/documentation/model_doc_matcha.txt
new file mode 100644
index 0000000000000000000000000000000000000000..608fa081ab20baa09233b7e36b798e8671936c39
--- /dev/null
+++ b/documentation/model_doc_matcha.txt
@@ -0,0 +1,41 @@
+
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
+
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
+
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
diff --git a/documentation/model_doc_mbart.txt b/documentation/model_doc_mbart.txt
new file mode 100644
index 0000000000000000000000000000000000000000..560d0ab5614a410cafd695c8919f18b1cf9e940a
--- /dev/null
+++ b/documentation/model_doc_mbart.txt
@@ -0,0 +1,162 @@
+
+MBart and MBart-50
+
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
+
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
+
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
+
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
+
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
+
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
+
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
+
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+
diff --git a/documentation/model_doc_mctct.txt b/documentation/model_doc_mctct.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20e1d612d7714b0d87c9b8dcd68ef87daaa1f934
--- /dev/null
+++ b/documentation/model_doc_mctct.txt
@@ -0,0 +1,43 @@
+
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
+
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mega.txt b/documentation/model_doc_mega.txt
new file mode 100644
index 0000000000000000000000000000000000000000..488cd4393500252791d51cf9301f6c3dca7deaa3
--- /dev/null
+++ b/documentation/model_doc_mega.txt
@@ -0,0 +1,50 @@
+
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
+
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
+
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_megatron-bert.txt b/documentation/model_doc_megatron-bert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e3d164648394cff927b26db7b4a551384e34832
--- /dev/null
+++ b/documentation/model_doc_megatron-bert.txt
@@ -0,0 +1,85 @@
+
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_megatron_gpt2.txt b/documentation/model_doc_megatron_gpt2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf21cc2e798cd03c429d5a730245b62244844cf5
--- /dev/null
+++ b/documentation/model_doc_megatron_gpt2.txt
@@ -0,0 +1,43 @@
+
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.  
diff --git a/documentation/model_doc_mgp-str.txt b/documentation/model_doc_mgp-str.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6254d2e3b4b3034ae6513696df0303b34793388
--- /dev/null
+++ b/documentation/model_doc_mgp-str.txt
@@ -0,0 +1,47 @@
+
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
+
+ MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mistral.txt b/documentation/model_doc_mistral.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a025b299478997492a728495bba502fbb4ad8e29
--- /dev/null
+++ b/documentation/model_doc_mistral.txt
@@ -0,0 +1,146 @@
+
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
+
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_mixtral.txt b/documentation/model_doc_mixtral.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67b677791403ad3c3afdbfe0e1c8177ce8f0b671
--- /dev/null
+++ b/documentation/model_doc_mixtral.txt
@@ -0,0 +1,131 @@
+
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
+
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length. 
+
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mluke.txt b/documentation/model_doc_mluke.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bb4de5806f33cc11aabee99edf8f7069e6cfde4
--- /dev/null
+++ b/documentation/model_doc_mluke.txt
@@ -0,0 +1,37 @@
+
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
+
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/documentation/model_doc_mms.txt b/documentation/model_doc_mms.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4e5511eade3794e4b1ae85ce78f45fc843b1d86
--- /dev/null
+++ b/documentation/model_doc_mms.txt
@@ -0,0 +1,262 @@
+
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
+
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
+
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
+
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
+
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil. 
+
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
diff --git a/documentation/model_doc_mobilebert.txt b/documentation/model_doc_mobilebert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59fbd7d7f8542da95c6e08335ccf3bfea56bc2b4
--- /dev/null
+++ b/documentation/model_doc_mobilebert.txt
@@ -0,0 +1,96 @@
+
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
+
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
+
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
+
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_mobilenet_v1.txt b/documentation/model_doc_mobilenet_v1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bd7f34a7692ff7a35c17ab4772cdc31e3b21931
--- /dev/null
+++ b/documentation/model_doc_mobilenet_v1.txt
@@ -0,0 +1,50 @@
+
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
+
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mobilenet_v2.txt b/documentation/model_doc_mobilenet_v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d13bb6c70bd17aa8d4e8a6ea17617766eb1c8d8
--- /dev/null
+++ b/documentation/model_doc_mobilenet_v2.txt
@@ -0,0 +1,60 @@
+
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
+
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mobilevit.txt b/documentation/model_doc_mobilevit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2f8ab092bc600d5afa7d9a3abeb2c25b9c6ce3c
--- /dev/null
+++ b/documentation/model_doc_mobilevit.txt
@@ -0,0 +1,75 @@
+
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
+
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
+
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
+
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
+
diff --git a/documentation/model_doc_mobilevitv2.txt b/documentation/model_doc_mobilevitv2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2426fc086d2947ef6dcbc56cff44d2dcbed748e9
--- /dev/null
+++ b/documentation/model_doc_mobilevitv2.txt
@@ -0,0 +1,27 @@
+
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
+
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mpnet.txt b/documentation/model_doc_mpnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9390f3adbe168aa59a521aa4846300dd0d1b0e42
--- /dev/null
+++ b/documentation/model_doc_mpnet.txt
@@ -0,0 +1,79 @@
+
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
+
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_mpt.txt b/documentation/model_doc_mpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cb03304b9ecf43509ceadeb438bde6d97047f88
--- /dev/null
+++ b/documentation/model_doc_mpt.txt
@@ -0,0 +1,39 @@
+
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
+
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mra.txt b/documentation/model_doc_mra.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4607c1b9508bdb031497eae2901616eee603b6e0
--- /dev/null
+++ b/documentation/model_doc_mra.txt
@@ -0,0 +1,28 @@
+
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mt5.txt b/documentation/model_doc_mt5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ef05405e4bb6b65e99f7ea0138b755c13ee2d4f
--- /dev/null
+++ b/documentation/model_doc_mt5.txt
@@ -0,0 +1,73 @@
+
+mT5
+
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
+
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
+
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
+
diff --git a/documentation/model_doc_musicgen.txt b/documentation/model_doc_musicgen.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5fb7714d8711a94936d272a8ee6665f66fc4967
--- /dev/null
+++ b/documentation/model_doc_musicgen.txt
@@ -0,0 +1,196 @@
+
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
+
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
+
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
+
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_musicgen_melody.txt b/documentation/model_doc_musicgen_melody.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44b18cc4c90cc5a526c206dafbda6aa7259b98f9
--- /dev/null
+++ b/documentation/model_doc_musicgen_melody.txt
@@ -0,0 +1,185 @@
+
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
+
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
+
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
+
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_mvp.txt b/documentation/model_doc_mvp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6406ca394b9fe46cb616c7d9db61dbeddb997de0
--- /dev/null
+++ b/documentation/model_doc_mvp.txt
@@ -0,0 +1,106 @@
+
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
+
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
+
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
+
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
+
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_nat.txt b/documentation/model_doc_nat.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3edacd5eff4123f4476733df3fffb823eb4b0b75
--- /dev/null
+++ b/documentation/model_doc_nat.txt
@@ -0,0 +1,56 @@
+
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
+
+ Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
+
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_nezha.txt b/documentation/model_doc_nezha.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2da98cb7ed666182ee5a53bab6d1ba35507f754
--- /dev/null
+++ b/documentation/model_doc_nezha.txt
@@ -0,0 +1,55 @@
+
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_nllb-moe.txt b/documentation/model_doc_nllb-moe.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc1e8768a2d5cb6c24a9e1b9a17fdcee5577674
--- /dev/null
+++ b/documentation/model_doc_nllb-moe.txt
@@ -0,0 +1,86 @@
+
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
+
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_nllb.txt b/documentation/model_doc_nllb.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e291e2f232306c03bed713ea438b97d3034b056
--- /dev/null
+++ b/documentation/model_doc_nllb.txt
@@ -0,0 +1,127 @@
+
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
+
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
+
diff --git a/documentation/model_doc_nougat.txt b/documentation/model_doc_nougat.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a220bbd947bc16f801184e08f018f9e02707e8c6
--- /dev/null
+++ b/documentation/model_doc_nougat.txt
@@ -0,0 +1,73 @@
+
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
+
+ Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step PDF transcription
+
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
+
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/documentation/model_doc_nystromformer.txt b/documentation/model_doc_nystromformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b06cc1c2578de2931746b917c92bb791163e2e13
--- /dev/null
+++ b/documentation/model_doc_nystromformer.txt
@@ -0,0 +1,46 @@
+
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_olmo.txt b/documentation/model_doc_olmo.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fa3ca71e697523c9745cc3d8c3d4c9609935d28
--- /dev/null
+++ b/documentation/model_doc_olmo.txt
@@ -0,0 +1,17 @@
+
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_oneformer.txt b/documentation/model_doc_oneformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..707dcce27934705af46860a0b9ca7ce40145a63c
--- /dev/null
+++ b/documentation/model_doc_oneformer.txt
@@ -0,0 +1,47 @@
+
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
+
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
+
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
+
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_open-llama.txt b/documentation/model_doc_open-llama.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6650f3ed04d957a40d13ee76a208c689e5a98773
--- /dev/null
+++ b/documentation/model_doc_open-llama.txt
@@ -0,0 +1,26 @@
+
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
+
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_openai-gpt.txt b/documentation/model_doc_openai-gpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6524c8a78e21468532fd6dcf0c2a2116a8e60e
--- /dev/null
+++ b/documentation/model_doc_openai-gpt.txt
@@ -0,0 +1,94 @@
+
+OpenAI GPT
+
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
+
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
+
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
+
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
+
diff --git a/documentation/model_doc_opt.txt b/documentation/model_doc_opt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0946bf0fb085dcb9a662e7988a7801e884b323d6
--- /dev/null
+++ b/documentation/model_doc_opt.txt
@@ -0,0 +1,92 @@
+
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
+
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
+
diff --git a/documentation/model_doc_owlv2.txt b/documentation/model_doc_owlv2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e031e28091c2d70bb5e2c2197e156bd5ac0be0b
--- /dev/null
+++ b/documentation/model_doc_owlv2.txt
@@ -0,0 +1,76 @@
+
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
+
+ OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
+
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
+
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
+
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/documentation/model_doc_owlvit.txt b/documentation/model_doc_owlvit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea98e23cc6cd1b6240ad11f436ac790b14abf0c9
--- /dev/null
+++ b/documentation/model_doc_owlvit.txt
@@ -0,0 +1,74 @@
+
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
+
+ OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
+
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
+
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/documentation/model_doc_paligemma.txt b/documentation/model_doc_paligemma.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf0ddf0ecd903d489969a8a9f451a386c6731138
--- /dev/null
+++ b/documentation/model_doc_paligemma.txt
@@ -0,0 +1,42 @@
+
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
+
+ PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
+
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
+
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
+
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_patchtsmixer.txt b/documentation/model_doc_patchtsmixer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0f4eba7dfc5b350f8eae1a7c7aacddb967f7d49
--- /dev/null
+++ b/documentation/model_doc_patchtsmixer.txt
@@ -0,0 +1,46 @@
+
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
+
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_patchtst.txt b/documentation/model_doc_patchtst.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0728b7d87ed3f2603cb72bf04c7bfed83c5cd578
--- /dev/null
+++ b/documentation/model_doc_patchtst.txt
@@ -0,0 +1,32 @@
+
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
+
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
+
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_pegasus.txt b/documentation/model_doc_pegasus.txt
new file mode 100644
index 0000000000000000000000000000000000000000..295b95a9ca9ce86d3fb10e76e24c11d154241baf
--- /dev/null
+++ b/documentation/model_doc_pegasus.txt
@@ -0,0 +1,113 @@
+
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
+
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
+
diff --git a/documentation/model_doc_pegasus_x.txt b/documentation/model_doc_pegasus_x.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6aea146ed1aef2edfc0f2c268598b9fd58189ab1
--- /dev/null
+++ b/documentation/model_doc_pegasus_x.txt
@@ -0,0 +1,23 @@
+
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
+
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_perceiver.txt b/documentation/model_doc_perceiver.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3923fd36edc88b516b34f6e37fdc7a15f259224
--- /dev/null
+++ b/documentation/model_doc_perceiver.txt
@@ -0,0 +1,138 @@
+
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
+
+ Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
+
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_persimmon.txt b/documentation/model_doc_persimmon.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33fcb05c893bf8463b39b5238dc1a7af4a053d16
--- /dev/null
+++ b/documentation/model_doc_persimmon.txt
@@ -0,0 +1,55 @@
+
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
+
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
+
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_phi.txt b/documentation/model_doc_phi.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f91cb861d8e1deb568d9edaae95fafeca9123e50
--- /dev/null
+++ b/documentation/model_doc_phi.txt
@@ -0,0 +1,123 @@
+
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
+
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
+
diff --git a/documentation/model_doc_phi3.txt b/documentation/model_doc_phi3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb2d2cf8152d3678caca09f265dd596b00e28416
--- /dev/null
+++ b/documentation/model_doc_phi3.txt
@@ -0,0 +1,53 @@
+
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
+
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
+
diff --git a/documentation/model_doc_phobert.txt b/documentation/model_doc_phobert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9173d489f2f974fecb6413b67f016d3efac1f79a
--- /dev/null
+++ b/documentation/model_doc_phobert.txt
@@ -0,0 +1,33 @@
+
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/documentation/model_doc_pix2struct.txt b/documentation/model_doc_pix2struct.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ea2fbc2ffc0c45cde84456d561c617b63468ff4
--- /dev/null
+++ b/documentation/model_doc_pix2struct.txt
@@ -0,0 +1,40 @@
+
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
+
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
+
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_plbart.txt b/documentation/model_doc_plbart.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1fcce4cb30d39d80be2aba0369da683d2da0eed
--- /dev/null
+++ b/documentation/model_doc_plbart.txt
@@ -0,0 +1,75 @@
+
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_poolformer.txt b/documentation/model_doc_poolformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fbd72dc2fb1eff668e06ef5d30628976ab931af
--- /dev/null
+++ b/documentation/model_doc_poolformer.txt
@@ -0,0 +1,43 @@
+
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
+
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
+
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_pop2piano.txt b/documentation/model_doc_pop2piano.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e7e2844cb56afb5a614f510d28392a17d5a6b91
--- /dev/null
+++ b/documentation/model_doc_pop2piano.txt
@@ -0,0 +1,148 @@
+
+Pop2Piano
+
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_prophetnet.txt b/documentation/model_doc_prophetnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03d06c59d0581c4872188d0e1a95fdee7392c393
--- /dev/null
+++ b/documentation/model_doc_prophetnet.txt
@@ -0,0 +1,55 @@
+
+ProphetNet
+
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
+
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_pvt.txt b/documentation/model_doc_pvt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9c25973e1e92b588e9fcfcaad9fc271be353a9
--- /dev/null
+++ b/documentation/model_doc_pvt.txt
@@ -0,0 +1,45 @@
+
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
+
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_pvt_v2.txt b/documentation/model_doc_pvt_v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd079369d53b6114a94f50dbb2f2b6b8ed970406
--- /dev/null
+++ b/documentation/model_doc_pvt_v2.txt
@@ -0,0 +1,72 @@
+
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
+
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
+
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_qdqbert.txt b/documentation/model_doc_qdqbert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d01d51600dc88787f4757203077418538133a00
--- /dev/null
+++ b/documentation/model_doc_qdqbert.txt
@@ -0,0 +1,119 @@
+
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
+
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
+
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
+
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
+
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_qwen2.txt b/documentation/model_doc_qwen2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8080e010d608950155369663acf7cfa744fa7cb4
--- /dev/null
+++ b/documentation/model_doc_qwen2.txt
@@ -0,0 +1,42 @@
+
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_qwen2_moe.txt b/documentation/model_doc_qwen2_moe.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ad631ad29e2fae01778114ba490e2d3d685c7c1
--- /dev/null
+++ b/documentation/model_doc_qwen2_moe.txt
@@ -0,0 +1,42 @@
+
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
+
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_rag.txt b/documentation/model_doc_rag.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8a56e4acd4815ebae581e00400cafbeb682ca41
--- /dev/null
+++ b/documentation/model_doc_rag.txt
@@ -0,0 +1,66 @@
+
+RAG
+
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
+
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
+
diff --git a/documentation/model_doc_realm.txt b/documentation/model_doc_realm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d4dc13fb53ce4ba0dc3cdb11f47629d3fdfb8a3
--- /dev/null
+++ b/documentation/model_doc_realm.txt
@@ -0,0 +1,56 @@
+
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_recurrent_gemma.txt b/documentation/model_doc_recurrent_gemma.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23ceb8a98857c5c9acfdbe4cb3c82f39baa89ea7
--- /dev/null
+++ b/documentation/model_doc_recurrent_gemma.txt
@@ -0,0 +1,19 @@
+
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_reformer.txt b/documentation/model_doc_reformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cf541f0019ad1aa182eb11a8bddb1b9d8e49261
--- /dev/null
+++ b/documentation/model_doc_reformer.txt
@@ -0,0 +1,123 @@
+
+Reformer
+
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
+
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_regnet.txt b/documentation/model_doc_regnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29b29894ba123c9f559b62b12f5a49f210aa4a3b
--- /dev/null
+++ b/documentation/model_doc_regnet.txt
@@ -0,0 +1,43 @@
+
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
+
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
+
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
+
diff --git a/documentation/model_doc_rembert.txt b/documentation/model_doc_rembert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01cd9bad162ff461d8ebce0b8fc88012f6030a36
--- /dev/null
+++ b/documentation/model_doc_rembert.txt
@@ -0,0 +1,89 @@
+
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
+
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_resnet.txt b/documentation/model_doc_resnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77f0b024b6452e675b5a0e130800cf8ce864219d
--- /dev/null
+++ b/documentation/model_doc_resnet.txt
@@ -0,0 +1,42 @@
+
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
+
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
+
diff --git a/documentation/model_doc_retribert.txt b/documentation/model_doc_retribert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41b14c66490a656c86362a35576e77395c76f2c2
--- /dev/null
+++ b/documentation/model_doc_retribert.txt
@@ -0,0 +1,22 @@
+
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_roberta-prelayernorm.txt b/documentation/model_doc_roberta-prelayernorm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..164277baf5213a026184748bbabfa45bb054333d
--- /dev/null
+++ b/documentation/model_doc_roberta-prelayernorm.txt
@@ -0,0 +1,92 @@
+
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
+
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
+
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
+
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_roberta.txt b/documentation/model_doc_roberta.txt
new file mode 100644
index 0000000000000000000000000000000000000000..200d05b5bd8b9daefb8d5ac648bea38cce47e05b
--- /dev/null
+++ b/documentation/model_doc_roberta.txt
@@ -0,0 +1,148 @@
+
+RoBERTa
+
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
+
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
+
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
+
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
+
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_roc_bert.txt b/documentation/model_doc_roc_bert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2947477ce463b04295b1b19c9be354a40e58dc
--- /dev/null
+++ b/documentation/model_doc_roc_bert.txt
@@ -0,0 +1,58 @@
+
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_roformer.txt b/documentation/model_doc_roformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdae6cc5f699f5fa1a6db35dd2ef7e5fba29bb27
--- /dev/null
+++ b/documentation/model_doc_roformer.txt
@@ -0,0 +1,103 @@
+
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
+
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
+
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
+
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_rt_detr.txt b/documentation/model_doc_rt_detr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bd9c58796cd9812822e52136321a09267582e06
--- /dev/null
+++ b/documentation/model_doc_rt_detr.txt
@@ -0,0 +1,42 @@
+
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
+
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
+
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_rwkv.txt b/documentation/model_doc_rwkv.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d9d772bd1f562a7515a3097ce2bee016250c6c0
--- /dev/null
+++ b/documentation/model_doc_rwkv.txt
@@ -0,0 +1,79 @@
+
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
+
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
+
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
+
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/documentation/model_doc_sam.txt b/documentation/model_doc_sam.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12d1f5f884845ba90c6b66687342c8a6567bfd44
--- /dev/null
+++ b/documentation/model_doc_sam.txt
@@ -0,0 +1,93 @@
+
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image. 
+
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
+
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository. 
+
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_seamless_m4t.txt b/documentation/model_doc_seamless_m4t.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d33611564feefabb13fa132ab669a6310e097783
--- /dev/null
+++ b/documentation/model_doc_seamless_m4t.txt
@@ -0,0 +1,134 @@
+
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/documentation/model_doc_seamless_m4t_v2.txt b/documentation/model_doc_seamless_m4t_v2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97145dddd259ddca35a64962ca8c13f71899f059
--- /dev/null
+++ b/documentation/model_doc_seamless_m4t_v2.txt
@@ -0,0 +1,123 @@
+
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/documentation/model_doc_segformer.txt b/documentation/model_doc_segformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe651df392ad3246267e1268e94eccc8e02896f1
--- /dev/null
+++ b/documentation/model_doc_segformer.txt
@@ -0,0 +1,117 @@
+
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
+
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
+
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
+
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
+
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
+
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
+
diff --git a/documentation/model_doc_seggpt.txt b/documentation/model_doc_seggpt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b606410395cdd22186efc3c3792828eeb037f22
--- /dev/null
+++ b/documentation/model_doc_seggpt.txt
@@ -0,0 +1,53 @@
+
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
+
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_sew-d.txt b/documentation/model_doc_sew-d.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a71dbd82411a450ee3d32da457084381d9575395
--- /dev/null
+++ b/documentation/model_doc_sew-d.txt
@@ -0,0 +1,37 @@
+
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_sew.txt b/documentation/model_doc_sew.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70658b7905c1cd77c290cbfb41b8cf0ad6bab8e7
--- /dev/null
+++ b/documentation/model_doc_sew.txt
@@ -0,0 +1,37 @@
+
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_siglip.txt b/documentation/model_doc_siglip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a26eb5e39fbfd9783d8655192278053ab1f022e
--- /dev/null
+++ b/documentation/model_doc_siglip.txt
@@ -0,0 +1,102 @@
+
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
+
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
+
+ SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_speech-encoder-decoder.txt b/documentation/model_doc_speech-encoder-decoder.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54402d83b48f788808edcf3be4c5cc2a28a03778
--- /dev/null
+++ b/documentation/model_doc_speech-encoder-decoder.txt
@@ -0,0 +1,87 @@
+
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
+
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
+
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/documentation/model_doc_speech_to_text.txt b/documentation/model_doc_speech_to_text.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee4fa60536876f123926413edc73bc4fb6369f37
--- /dev/null
+++ b/documentation/model_doc_speech_to_text.txt
@@ -0,0 +1,98 @@
+
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
+
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
+
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
+
diff --git a/documentation/model_doc_speech_to_text_2.txt b/documentation/model_doc_speech_to_text_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..645a034e5b8f23a1c7904ea096013a3d49bf8925
--- /dev/null
+++ b/documentation/model_doc_speech_to_text_2.txt
@@ -0,0 +1,91 @@
+
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
+
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
+
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_speecht5.txt b/documentation/model_doc_speecht5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a37b385f06303851ae35a9d0851cccadf84f3fb4
--- /dev/null
+++ b/documentation/model_doc_speecht5.txt
@@ -0,0 +1,45 @@
+
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_splinter.txt b/documentation/model_doc_splinter.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45a509bee66cd1b9ec459028834f583c2cdc2c6a
--- /dev/null
+++ b/documentation/model_doc_splinter.txt
@@ -0,0 +1,55 @@
+
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
+
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
+
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_squeezebert.txt b/documentation/model_doc_squeezebert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..372e0d5149ceae996e608a123d22b46787262521
--- /dev/null
+++ b/documentation/model_doc_squeezebert.txt
@@ -0,0 +1,61 @@
+
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
+
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/documentation/model_doc_stablelm.txt b/documentation/model_doc_stablelm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..510f31c4c1ae1b559753c9bfee9342cac481f1dc
--- /dev/null
+++ b/documentation/model_doc_stablelm.txt
@@ -0,0 +1,64 @@
+
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_starcoder2.txt b/documentation/model_doc_starcoder2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dc821bfebc1d6bdcaa5eb6fceb904668331bda1
--- /dev/null
+++ b/documentation/model_doc_starcoder2.txt
@@ -0,0 +1,38 @@
+
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
+
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
+
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
+
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_superpoint.txt b/documentation/model_doc_superpoint.txt
new file mode 100644
index 0000000000000000000000000000000000000000..934f49fc8f96bee76fc6ca2bb0a3958b2e8a96d7
--- /dev/null
+++ b/documentation/model_doc_superpoint.txt
@@ -0,0 +1,86 @@
+
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
+
+ SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
+
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
diff --git a/documentation/model_doc_swiftformer.txt b/documentation/model_doc_swiftformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27f28b0095d5bed2e6835e70c72e3e1f1e8c9219
--- /dev/null
+++ b/documentation/model_doc_swiftformer.txt
@@ -0,0 +1,23 @@
+
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/documentation/model_doc_swin.txt b/documentation/model_doc_swin.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41709f85957edecbc2e4404a3e6c33bf479f1a98
--- /dev/null
+++ b/documentation/model_doc_swin.txt
@@ -0,0 +1,60 @@
+
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
+
+ Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
+
diff --git a/documentation/model_doc_swin2sr.txt b/documentation/model_doc_swin2sr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f70451c675a326f4ad2cd8af60d9769ba57b323d
--- /dev/null
+++ b/documentation/model_doc_swin2sr.txt
@@ -0,0 +1,27 @@
+
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
+
+ Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_swinv2.txt b/documentation/model_doc_swinv2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36b53e90862295580c85704af227c2160d3c4839
--- /dev/null
+++ b/documentation/model_doc_swinv2.txt
@@ -0,0 +1,30 @@
+
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
+
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_switch_transformers.txt b/documentation/model_doc_switch_transformers.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf27a0449772dbfaa35f4f6fac449e348078c11e
--- /dev/null
+++ b/documentation/model_doc_switch_transformers.txt
@@ -0,0 +1,38 @@
+
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_t5.txt b/documentation/model_doc_t5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c4fae95ca1a63be5b30bb6187a27dffdd1ec655
--- /dev/null
+++ b/documentation/model_doc_t5.txt
@@ -0,0 +1,327 @@
+
+T5
+
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
+
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
+
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
+
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
+
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
+
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
+
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
+
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
+
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
+
diff --git a/documentation/model_doc_t5v1.1.txt b/documentation/model_doc_t5v1.1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86efeb62f698e3122860eb7c3a0c64a8c84fc6b3
--- /dev/null
+++ b/documentation/model_doc_t5v1.1.txt
@@ -0,0 +1,44 @@
+
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
diff --git a/documentation/model_doc_table-transformer.txt b/documentation/model_doc_table-transformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..644ac9038e642fcce3b4042e6db2ba6604d4d638
--- /dev/null
+++ b/documentation/model_doc_table-transformer.txt
@@ -0,0 +1,35 @@
+
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
+
+ Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_tapas.txt b/documentation/model_doc_tapas.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeca7dd4d754f072d6060ed38e55211c191f672c
--- /dev/null
+++ b/documentation/model_doc_tapas.txt
@@ -0,0 +1,498 @@
+
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
+
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
+
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
+
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
+
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
+
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
+
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
+
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
+
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
+
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
+
+         # zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
+
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
+
+         # forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
+
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
+
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_tapex.txt b/documentation/model_doc_tapex.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7deb00459c578f71cc69fdaae4f7c6377295d8b8
--- /dev/null
+++ b/documentation/model_doc_tapex.txt
@@ -0,0 +1,104 @@
+
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
+
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below). 
+
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
+
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
+
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
+
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/documentation/model_doc_time_series_transformer.txt b/documentation/model_doc_time_series_transformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac030bb0359f8631832ab4c34364f61f8e4696b6
--- /dev/null
+++ b/documentation/model_doc_time_series_transformer.txt
@@ -0,0 +1,48 @@
+
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
+
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
+
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_timesformer.txt b/documentation/model_doc_timesformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0be2b5fae61dc4ef3312f6a3f7c8c0b185bb95e0
--- /dev/null
+++ b/documentation/model_doc_timesformer.txt
@@ -0,0 +1,24 @@
+
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
+
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_trajectory_transformer.txt b/documentation/model_doc_trajectory_transformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a72e422e7f76eea4fcd1fddc2bc8e02f4ef8b45
--- /dev/null
+++ b/documentation/model_doc_trajectory_transformer.txt
@@ -0,0 +1,31 @@
+
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_transfo-xl.txt b/documentation/model_doc_transfo-xl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c2c1f0d7ecad287dd0991ad6f476ec60d3506c
--- /dev/null
+++ b/documentation/model_doc_transfo-xl.txt
@@ -0,0 +1,87 @@
+
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
+
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
+
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
+
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
+
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/documentation/model_doc_trocr.txt b/documentation/model_doc_trocr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0556ff96c82afd500d7856c3e10c75264cda64fb
--- /dev/null
+++ b/documentation/model_doc_trocr.txt
@@ -0,0 +1,85 @@
+
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
+
+ TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/documentation/model_doc_tvlt.txt b/documentation/model_doc_tvlt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37eb1ef118f150d6c6acaa1afdf3a85d9be77dd7
--- /dev/null
+++ b/documentation/model_doc_tvlt.txt
@@ -0,0 +1,43 @@
+
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
+
+ TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
+
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_tvp.txt b/documentation/model_doc_tvp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57f060b9c82e96a9496b95da6c1b7ea72242eb03
--- /dev/null
+++ b/documentation/model_doc_tvp.txt
@@ -0,0 +1,137 @@
+
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
+
+ TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
+
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_udop.txt b/documentation/model_doc_udop.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60ebcc5f4f611982e524fae99462ff744ceccbc6
--- /dev/null
+++ b/documentation/model_doc_udop.txt
@@ -0,0 +1,73 @@
+
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
+
+ UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
+
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
+
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
+
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_ul2.txt b/documentation/model_doc_ul2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f174eeab8c9f97b6de84737c05e9ab64c018ad79
--- /dev/null
+++ b/documentation/model_doc_ul2.txt
@@ -0,0 +1,15 @@
+
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
+
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
diff --git a/documentation/model_doc_umt5.txt b/documentation/model_doc_umt5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9864f1c829cc6bb98d24523fe4ad5f73232e7ef
--- /dev/null
+++ b/documentation/model_doc_umt5.txt
@@ -0,0 +1,63 @@
+
+UMT5
+
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
+
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_unispeech-sat.txt b/documentation/model_doc_unispeech-sat.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4efe2ba222ce54d15f0fe98c781dfe842e781164
--- /dev/null
+++ b/documentation/model_doc_unispeech-sat.txt
@@ -0,0 +1,56 @@
+
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_unispeech.txt b/documentation/model_doc_unispeech.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55d8c0684e88fec2fe1b847ada292e211ce1d182
--- /dev/null
+++ b/documentation/model_doc_unispeech.txt
@@ -0,0 +1,45 @@
+
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_univnet.txt b/documentation/model_doc_univnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b4025c9ff10fa62c6d000b6cdbf361cbec7cd3c
--- /dev/null
+++ b/documentation/model_doc_univnet.txt
@@ -0,0 +1,46 @@
+
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
+
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
+
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
+
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_upernet.txt b/documentation/model_doc_upernet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31347a6103cdd176b539dce9e92afb3d7333ecb5
--- /dev/null
+++ b/documentation/model_doc_upernet.txt
@@ -0,0 +1,40 @@
+
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
+
+ UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_van.txt b/documentation/model_doc_van.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7bf93f5bba3976f5bb2f4d164678ad1994d78b1
--- /dev/null
+++ b/documentation/model_doc_van.txt
@@ -0,0 +1,34 @@
+
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
+
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_video_llava.txt b/documentation/model_doc_video_llava.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8478a6bdd695f3e7a831c766d3fd862672410259
--- /dev/null
+++ b/documentation/model_doc_video_llava.txt
@@ -0,0 +1,130 @@
+
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting. 
+
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+For multiple turns conversation change the prompt format to:
+
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
+
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
+
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_videomae.txt b/documentation/model_doc_videomae.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7808e9dda36ebb52a8fc8e67f968f9d5a88bdae9
--- /dev/null
+++ b/documentation/model_doc_videomae.txt
@@ -0,0 +1,57 @@
+
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
+
+ VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vilt.txt b/documentation/model_doc_vilt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..351f6f7837b5b8b1ff59a9375bdce555cfb62ce5
--- /dev/null
+++ b/documentation/model_doc_vilt.txt
@@ -0,0 +1,61 @@
+
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
+
+ ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vipllava.txt b/documentation/model_doc_vipllava.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ebace5a6904806ff40d30eec6c7b0fcebc4bed4
--- /dev/null
+++ b/documentation/model_doc_vipllava.txt
@@ -0,0 +1,28 @@
+
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
+
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vision-encoder-decoder.txt b/documentation/model_doc_vision-encoder-decoder.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b02bb23d73bb6891400f3d49247e8f5c06b1d536
--- /dev/null
+++ b/documentation/model_doc_vision-encoder-decoder.txt
@@ -0,0 +1,119 @@
+
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
+
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
+
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
+
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
diff --git a/documentation/model_doc_vision-text-dual-encoder.txt b/documentation/model_doc_vision-text-dual-encoder.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2d51243af493d0bbcdcbbb8d4a7e23dee7a1e84
--- /dev/null
+++ b/documentation/model_doc_vision-text-dual-encoder.txt
@@ -0,0 +1,28 @@
+
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
+
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
+
diff --git a/documentation/model_doc_visual_bert.txt b/documentation/model_doc_visual_bert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3a2012828404d0599bf86663013efcec32d0326
--- /dev/null
+++ b/documentation/model_doc_visual_bert.txt
@@ -0,0 +1,87 @@
+
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
+
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
+
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
+
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vit.txt b/documentation/model_doc_vit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70462d973c1f713daafcf177688bcee99a7faf0d
--- /dev/null
+++ b/documentation/model_doc_vit.txt
@@ -0,0 +1,145 @@
+
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+
+ ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
+
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
+
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
+
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
+
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
+
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
+
diff --git a/documentation/model_doc_vit_hybrid.txt b/documentation/model_doc_vit_hybrid.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f63f6e33217a0601a802a7c4a2bb7a5facf3e05
--- /dev/null
+++ b/documentation/model_doc_vit_hybrid.txt
@@ -0,0 +1,62 @@
+
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
+
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vit_mae.txt b/documentation/model_doc_vit_mae.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf4fa8c6a29c5115c72e0c3412ff64c4b9b4d8b4
--- /dev/null
+++ b/documentation/model_doc_vit_mae.txt
@@ -0,0 +1,73 @@
+
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
+
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
+
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
+
diff --git a/documentation/model_doc_vit_msn.txt b/documentation/model_doc_vit_msn.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b9bbb5212c464fe889abb5e1f4bcddb74d987ed
--- /dev/null
+++ b/documentation/model_doc_vit_msn.txt
@@ -0,0 +1,62 @@
+
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
+
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
+
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vitdet.txt b/documentation/model_doc_vitdet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..601ec5a64992c8f58562de04f9b6d4125c6bc131
--- /dev/null
+++ b/documentation/model_doc_vitdet.txt
@@ -0,0 +1,18 @@
+
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
+
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vitmatte.txt b/documentation/model_doc_vitmatte.txt
new file mode 100644
index 0000000000000000000000000000000000000000..890ec44cb5dd9bb8041e2d4a3c935b8b7ec06e98
--- /dev/null
+++ b/documentation/model_doc_vitmatte.txt
@@ -0,0 +1,26 @@
+
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
+
+ ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vits.txt b/documentation/model_doc_vits.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e339f5bed3d3c4ef69b33385d8880caefa0dcb49
--- /dev/null
+++ b/documentation/model_doc_vits.txt
@@ -0,0 +1,105 @@
+
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_vivit.txt b/documentation/model_doc_vivit.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfe3f89c7111aa3ae9dad5bfd3216a27b1245b85
--- /dev/null
+++ b/documentation/model_doc_vivit.txt
@@ -0,0 +1,19 @@
+
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_wav2vec2-bert.txt b/documentation/model_doc_wav2vec2-bert.txt
new file mode 100644
index 0000000000000000000000000000000000000000..268e59618272ad649b8197457019410c66cfb950
--- /dev/null
+++ b/documentation/model_doc_wav2vec2-bert.txt
@@ -0,0 +1,49 @@
+
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
+
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
+
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_wav2vec2-conformer.txt b/documentation/model_doc_wav2vec2-conformer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..795638f9341b8c2b268983801922f8d23dfa9ce2
--- /dev/null
+++ b/documentation/model_doc_wav2vec2-conformer.txt
@@ -0,0 +1,46 @@
+
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_wav2vec2.txt b/documentation/model_doc_wav2vec2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d5c90bf8221d54b7a829bf8f51d96e8b2a44b0d
--- /dev/null
+++ b/documentation/model_doc_wav2vec2.txt
@@ -0,0 +1,174 @@
+
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
+
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
+
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
+
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
+
+     with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
+
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
+
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
+
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
+
diff --git a/documentation/model_doc_wav2vec2_phoneme.txt b/documentation/model_doc_wav2vec2_phoneme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c586573fd90687c6d6c2c9e83b66ed21108f0f7
--- /dev/null
+++ b/documentation/model_doc_wav2vec2_phoneme.txt
@@ -0,0 +1,36 @@
+
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
+
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/documentation/model_doc_wavlm.txt b/documentation/model_doc_wavlm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f76ffa38c582541942e3bfbefe76b8570906588c
--- /dev/null
+++ b/documentation/model_doc_wavlm.txt
@@ -0,0 +1,50 @@
+
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_whisper.txt b/documentation/model_doc_whisper.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50f26e86f815f10907888fa58d6e68656e6291bc
--- /dev/null
+++ b/documentation/model_doc_whisper.txt
@@ -0,0 +1,125 @@
+
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
+
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
+
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
+
diff --git a/documentation/model_doc_xclip.txt b/documentation/model_doc_xclip.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9a6a2316c9c7c46d35dda2c52e0f44e63e209e2
--- /dev/null
+++ b/documentation/model_doc_xclip.txt
@@ -0,0 +1,40 @@
+
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
+
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_xglm.txt b/documentation/model_doc_xglm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65d45d077553f0b02126f8c17b6fea3c3be325b1
--- /dev/null
+++ b/documentation/model_doc_xglm.txt
@@ -0,0 +1,58 @@
+
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
+
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
+
diff --git a/documentation/model_doc_xlm-prophetnet.txt b/documentation/model_doc_xlm-prophetnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c12a1967c1fcdd3f0d264d970dfbd69de1282d52
--- /dev/null
+++ b/documentation/model_doc_xlm-prophetnet.txt
@@ -0,0 +1,46 @@
+
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/documentation/model_doc_xlm-roberta-xl.txt b/documentation/model_doc_xlm-roberta-xl.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bc83eee1cc3bc0d945c6e2d5a8556d21e15dd3
--- /dev/null
+++ b/documentation/model_doc_xlm-roberta-xl.txt
@@ -0,0 +1,43 @@
+
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_xlm-roberta.txt b/documentation/model_doc_xlm-roberta.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dc1cfa35b9b074ab31401f90fe5bb02e3c56942
--- /dev/null
+++ b/documentation/model_doc_xlm-roberta.txt
@@ -0,0 +1,150 @@
+
+XLM-RoBERTa
+
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
+
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
+
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
+
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
+
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
+
diff --git a/documentation/model_doc_xlm-v.txt b/documentation/model_doc_xlm-v.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6554ceceb84b6b0df37fefbec0dabc372ef23ee6
--- /dev/null
+++ b/documentation/model_doc_xlm-v.txt
@@ -0,0 +1,28 @@
+
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
+
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
diff --git a/documentation/model_doc_xlm.txt b/documentation/model_doc_xlm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6477c69993a1838d8612e7b0d07570994d902ea1
--- /dev/null
+++ b/documentation/model_doc_xlm.txt
@@ -0,0 +1,95 @@
+
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
+
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
+
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
+
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
+
diff --git a/documentation/model_doc_xlnet.txt b/documentation/model_doc_xlnet.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1e7e7b886743efce06f6b1fd4eea7fe7d30c335
--- /dev/null
+++ b/documentation/model_doc_xlnet.txt
@@ -0,0 +1,105 @@
+
+XLNet
+
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
+
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
+
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
+
diff --git a/documentation/model_doc_xls_r.txt b/documentation/model_doc_xls_r.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520049a88d11762ee0648b63b0c0f5ff6742f7c1
--- /dev/null
+++ b/documentation/model_doc_xls_r.txt
@@ -0,0 +1,25 @@
+
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
+
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
diff --git a/documentation/model_doc_xlsr_wav2vec2.txt b/documentation/model_doc_xlsr_wav2vec2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2278bde03a965ba7866e8293f4ced9303183ada3
--- /dev/null
+++ b/documentation/model_doc_xlsr_wav2vec2.txt
@@ -0,0 +1,25 @@
+
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
diff --git a/documentation/model_doc_xmod.txt b/documentation/model_doc_xmod.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00028d3a29befc76e67b5d819f7d0367f05e2550
--- /dev/null
+++ b/documentation/model_doc_xmod.txt
@@ -0,0 +1,84 @@
+
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
+
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_yolos.txt b/documentation/model_doc_yolos.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5f853e49ddd5a59b9244ec67fc947147ab27d33
--- /dev/null
+++ b/documentation/model_doc_yolos.txt
@@ -0,0 +1,58 @@
+
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
+
+ YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
+
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
+
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/documentation/model_doc_yoso.txt b/documentation/model_doc_yoso.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ace08b835004f4d13ad472b2745e3e436cb5501
--- /dev/null
+++ b/documentation/model_doc_yoso.txt
@@ -0,0 +1,60 @@
+
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
+
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/documentation/quantization_aqlm.txt b/documentation/quantization_aqlm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75e54d79c7f174bd5f06b7c20ee27b6731a46edf
--- /dev/null
+++ b/documentation/quantization_aqlm.txt
@@ -0,0 +1,31 @@
+
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
+
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
+
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/documentation/quantization_awq.txt b/documentation/quantization_awq.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9750b556cf21d20b3032ee90ca245ad88ed99a5e
--- /dev/null
+++ b/documentation/quantization_awq.txt
@@ -0,0 +1,143 @@
+
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
+
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
+
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
+
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
+
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
+
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
diff --git a/documentation/quantization_bitsandbytes.txt b/documentation/quantization_bitsandbytes.txt
new file mode 100644
index 0000000000000000000000000000000000000000..895fb04ca9e49a42429dd56cd55c995daf86badc
--- /dev/null
+++ b/documentation/quantization_bitsandbytes.txt
@@ -0,0 +1,180 @@
+
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
+
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
+
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
+
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
+
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
+
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
+
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/documentation/quantization_contribute.txt b/documentation/quantization_contribute.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2bc27b79ae0c47e573d07f5e7227cc213fd54e4
--- /dev/null
+++ b/documentation/quantization_contribute.txt
@@ -0,0 +1,49 @@
+
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
+
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
+
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
+
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
+
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
+
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
+
diff --git a/documentation/quantization_eetq.txt b/documentation/quantization_eetq.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca9cb25c467ef0a2d98fb6f8feaa220abe14bed3
--- /dev/null
+++ b/documentation/quantization_eetq.txt
@@ -0,0 +1,21 @@
+
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/documentation/quantization_gptq.txt b/documentation/quantization_gptq.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc6d09086bab8b508ce1cc21a693f579b45fdda2
--- /dev/null
+++ b/documentation/quantization_gptq.txt
@@ -0,0 +1,63 @@
+
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
+
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
+
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
+
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
+
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
+
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
+
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/documentation/quantization_hqq.txt b/documentation/quantization_hqq.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4252361e59d260874b7a77765f8da8dc295a9abb
--- /dev/null
+++ b/documentation/quantization_hqq.txt
@@ -0,0 +1,39 @@
+
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
+
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
+
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/documentation/quantization_optimum.txt b/documentation/quantization_optimum.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70e8bb3c1a2633a8d9ec231ba792e36f95a74aee
--- /dev/null
+++ b/documentation/quantization_optimum.txt
@@ -0,0 +1,3 @@
+
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/documentation/quantization_overview.txt b/documentation/quantization_overview.txt
new file mode 100644
index 0000000000000000000000000000000000000000..422058259b49a3a21e7397c649abecd076c052ed
--- /dev/null
+++ b/documentation/quantization_overview.txt
@@ -0,0 +1,27 @@
+
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
+
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/documentation/quantization_quanto.txt b/documentation/quantization_quanto.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d60cb563e5f70fd8a0ca32c1174c3ea4bb0cf0e
--- /dev/null
+++ b/documentation/quantization_quanto.txt
@@ -0,0 +1,17 @@
+
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
+
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/documentation/tasks_asr.txt b/documentation/tasks_asr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b3920d23e1fe3173614a1fe59db9db20a1236a1
--- /dev/null
+++ b/documentation/tasks_asr.txt
@@ -0,0 +1,246 @@
+
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
+
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
+
diff --git a/documentation/tasks_audio_classification.txt b/documentation/tasks_audio_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f56e73628e891cd9e2fb5a1a8ab09b85bbc4b0cb
--- /dev/null
+++ b/documentation/tasks_audio_classification.txt
@@ -0,0 +1,218 @@
+
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
+
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
+
diff --git a/documentation/tasks_document_question_answering.txt b/documentation/tasks_document_question_answering.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c0b875677383ae41ca2d799ed68005b3e754545
--- /dev/null
+++ b/documentation/tasks_document_question_answering.txt
@@ -0,0 +1,367 @@
+
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
+
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
+
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
+
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
+
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
+
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
+
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
+
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development 
+
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
+
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
+
+     # encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
+
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
+
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
+
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
+
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
+
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
+
diff --git a/documentation/tasks_idefics.txt b/documentation/tasks_idefics.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49b323b4364c75ae53b0b06a335f06cc3fd4f121
--- /dev/null
+++ b/documentation/tasks_idefics.txt
@@ -0,0 +1,290 @@
+
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
+
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
+
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
+
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
+
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
+
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
+
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
+
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions: 
+
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
+
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
+
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```  
+
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
+
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
+
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
+
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models: 
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
+
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
+
diff --git a/documentation/tasks_image_captioning.txt b/documentation/tasks_image_captioning.txt
new file mode 100644
index 0000000000000000000000000000000000000000..674d6199b510960157d360e181c3c4b7ea3da665
--- /dev/null
+++ b/documentation/tasks_image_captioning.txt
@@ -0,0 +1,168 @@
+
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training. 
+
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
+
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
+
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
+
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
+
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/documentation/tasks_image_classification.txt b/documentation/tasks_image_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3197cd0120a48d37eb1d67d02c86202f05d7d82
--- /dev/null
+++ b/documentation/tasks_image_classification.txt
@@ -0,0 +1,364 @@
+
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
+
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
+
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
+
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+ def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
+
diff --git a/documentation/tasks_image_feature_extraction.txt b/documentation/tasks_image_feature_extraction.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d1c3f7472acebac90c61c25af23b3515c2028f4
--- /dev/null
+++ b/documentation/tasks_image_feature_extraction.txt
@@ -0,0 +1,79 @@
+
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
+
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
+
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
+
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
+
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
+
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
+
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/documentation/tasks_image_to_image.txt b/documentation/tasks_image_to_image.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b94a69ae5559cc389961e805bbd6bba7ee4b6832
--- /dev/null
+++ b/documentation/tasks_image_to_image.txt
@@ -0,0 +1,77 @@
+
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
+
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
+
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
+
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
+
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
+
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
+
diff --git a/documentation/tasks_knowledge_distillation_for_image_classification.txt b/documentation/tasks_knowledge_distillation_for_image_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60c37b46429494574b2b7818b868ada7003bb250
--- /dev/null
+++ b/documentation/tasks_knowledge_distillation_for_image_classification.txt
@@ -0,0 +1,129 @@
+
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process. 
+
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
+
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
+
+    with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
+
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
+
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
+
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/documentation/tasks_language_modeling.txt b/documentation/tasks_language_modeling.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c291be58dd8111e7cd05c7d65107cecea9edce
--- /dev/null
+++ b/documentation/tasks_language_modeling.txt
@@ -0,0 +1,286 @@
+
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
+
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
+
diff --git a/documentation/tasks_mask_generation.txt b/documentation/tasks_mask_generation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3f3f874d638f536ef154608c59c114b8112dbb7
--- /dev/null
+++ b/documentation/tasks_mask_generation.txt
@@ -0,0 +1,145 @@
+
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks. 
+
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
+
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
+
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
+
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
+
diff --git a/documentation/tasks_masked_language_modeling.txt b/documentation/tasks_masked_language_modeling.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef52caa315c2ad866039a36d904cb9f8258b4b59
--- /dev/null
+++ b/documentation/tasks_masked_language_modeling.txt
@@ -0,0 +1,302 @@
+
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM. 
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
+
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
+
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+
diff --git a/documentation/tasks_monocular_depth_estimation.txt b/documentation/tasks_monocular_depth_estimation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d048dc6a1399b384888b1840daab20198b7e083f
--- /dev/null
+++ b/documentation/tasks_monocular_depth_estimation.txt
@@ -0,0 +1,84 @@
+
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
+
diff --git a/documentation/tasks_multiple_choice.txt b/documentation/tasks_multiple_choice.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dee6a9bc1ed494afb273fef609567ca3dbe2d34f
--- /dev/null
+++ b/documentation/tasks_multiple_choice.txt
@@ -0,0 +1,313 @@
+
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
+
+     first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
+
diff --git a/documentation/tasks_object_detection.txt b/documentation/tasks_object_detection.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee0e7808ada0cd7cb755e2b66c4018200880a05
--- /dev/null
+++ b/documentation/tasks_object_detection.txt
@@ -0,0 +1,1354 @@
+
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
+
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
+
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
+
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
+
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
+
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
+
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
+
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
+
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
+
+     Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
+
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
+
+     images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
+
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
+
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
+
+      [[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
+
+      [[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
+
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
+
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
+
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
+
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
+
+     Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
+
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
+
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
+
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
+
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
+
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
+
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
+
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
+
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
+
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
+
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
+
diff --git a/documentation/tasks_prompting.txt b/documentation/tasks_prompting.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3b34d80407dc26a94af23a2891354e63033f0da
--- /dev/null
+++ b/documentation/tasks_prompting.txt
@@ -0,0 +1,332 @@
+
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
+
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
+
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
+
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
+
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
+
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
+
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
+
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
+
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
+
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
+
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
+
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
+
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
+
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
+
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
+
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
+
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts. 
+
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
+
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
+
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.   
\ No newline at end of file
diff --git a/documentation/tasks_question_answering.txt b/documentation/tasks_question_answering.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7454e2e3a7e3c9c25953d28f291c6e5e627d1659
--- /dev/null
+++ b/documentation/tasks_question_answering.txt
@@ -0,0 +1,292 @@
+
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
+
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
+
+     offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
diff --git a/documentation/tasks_semantic_segmentation.txt b/documentation/tasks_semantic_segmentation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baa1b687c0ff39603cd50f021ad0f76c86187457
--- /dev/null
+++ b/documentation/tasks_semantic_segmentation.txt
@@ -0,0 +1,694 @@
+
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
+
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
+
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
+
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
+
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
+
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
+
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
+
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
+
+ from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
+ 
+
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
+
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
+
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
+
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
+
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
+
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
+
+     pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
+
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
+
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
+
diff --git a/documentation/tasks_sequence_classification.txt b/documentation/tasks_sequence_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3fd11667e61a8136521b485c6a0796e76f8f90e
--- /dev/null
+++ b/documentation/tasks_sequence_classification.txt
@@ -0,0 +1,246 @@
+
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
+
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
+
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
diff --git a/documentation/tasks_summarization.txt b/documentation/tasks_summarization.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73cf60145ff33d6e40c285c92fc7a5526c6f682
--- /dev/null
+++ b/documentation/tasks_summarization.txt
@@ -0,0 +1,255 @@
+
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
+ 'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
+
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
+
+     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+
+     result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
+
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
+
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+
diff --git a/documentation/tasks_text-to-speech.txt b/documentation/tasks_text-to-speech.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc88d146e98901831a3abba6798ee18d2c822133
--- /dev/null
+++ b/documentation/tasks_text-to-speech.txt
@@ -0,0 +1,445 @@
+
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
+
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
+
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
+
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
+
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
+
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
+
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
+
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
+
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
+
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
+
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
+
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
+
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/documentation/tasks_token_classification.txt b/documentation/tasks_token_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e187c2f03ddd0c86adaf55407f189b24df287f23
--- /dev/null
+++ b/documentation/tasks_token_classification.txt
@@ -0,0 +1,409 @@
+
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
+
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
+
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
+
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+
+     labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
+
+     true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
diff --git a/documentation/tasks_translation.txt b/documentation/tasks_translation.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b60d542c81af1b88f4940d5a349dda88be50e036
--- /dev/null
+++ b/documentation/tasks_translation.txt
@@ -0,0 +1,264 @@
+
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
+
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
+
diff --git a/documentation/tasks_video_classification.txt b/documentation/tasks_video_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84a91a9f4bf113ebae009bc464f7be425f9d921a
--- /dev/null
+++ b/documentation/tasks_video_classification.txt
@@ -0,0 +1,401 @@
+
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
+
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
+
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
+
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
+ 
+
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
+
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need. 
+ 
+
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
+
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set: 
+ 
+
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
+ 
+
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
+
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
+ 
+
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
+
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
+ 
+
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
+
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
+
+     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/documentation/tasks_visual_question_answering.txt b/documentation/tasks_visual_question_answering.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7660fa1a3939050bf34786ca0a069b61983046e2
--- /dev/null
+++ b/documentation/tasks_visual_question_answering.txt
@@ -0,0 +1,285 @@
+
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
+
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
+
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
+
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
+
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
+
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
+
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
+
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
+
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically: 
+
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
+
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/documentation/tasks_zero_shot_image_classification.txt b/documentation/tasks_zero_shot_image_classification.txt
new file mode 100644
index 0000000000000000000000000000000000000000..486025e189c2e6d74800c27c6b74b1e7a51d0050
--- /dev/null
+++ b/documentation/tasks_zero_shot_image_classification.txt
@@ -0,0 +1,94 @@
+
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
+
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
+
diff --git a/documentation/tasks_zero_shot_object_detection.txt b/documentation/tasks_zero_shot_object_detection.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b4667343e873d76ae52487e5dc89c52d11e0e46
--- /dev/null
+++ b/documentation/tasks_zero_shot_object_detection.txt
@@ -0,0 +1,193 @@
+
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for. 
+
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
+
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
+
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
+
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
+
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__config.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__redirects.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4eaa4d4466ff0ac9f4dae7a284f5d6e97224f5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,113 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
+
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
+
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86b7839057f70deed69b94be3a7cae6c04c336b3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,97 @@
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
+
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
+
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58ea9deffda2e3b8bd15f9504f55e4e7f4c940f4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,101 @@
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
+
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8dcfe9867ed00f5778d4b77bc2b95b65314c6cf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,582 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/__toctree.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_accelerate.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e75a215b828f769c04356d8b6dcae78956e40ede
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,89 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cbc553fa0069a479cf8368d86806f59929d3f62
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
+
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
+
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
+
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8a96e46054b4411aeed60cd8f8248e02401cc61
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
+
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
+
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_10.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74b59316bfe03cdf94f26611a88794afd8719e5c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_10.txt
@@ -0,0 +1,47 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_11.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4724c2501fdd23ff585599885cb353bed2af8284
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_11.txt
@@ -0,0 +1,48 @@
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
+
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
+
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_12.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9b624ad3a359216870d8a7c9477628f98bfe3f5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_12.txt
@@ -0,0 +1,52 @@
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
+
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
+
+make style
+and verify that your coding style passes the quality check:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_13.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acc222620bc7de85ab9b076155c48e4917274d91
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_13.txt
@@ -0,0 +1,38 @@
+make style
+and verify that your coding style passes the quality check:
+
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
+
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..282317b0d03974e6128678f424dd74023e1d5b1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,55 @@
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
+
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
+
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
+
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17a5f0a7bdb632a27a482799b75cdafaaaea9cd4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_3.txt
@@ -0,0 +1,61 @@
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
+
+   git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
+
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..559025350ba948424d9dec839744e2c94901de76
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_4.txt
@@ -0,0 +1,50 @@
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
+
+Jupyter notebooks / google colab
+Local python scripts.
+
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
+
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
+
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf346ce656efe8e0234e212a22711838caf805e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_5.txt
@@ -0,0 +1,37 @@
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
+
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
+
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7307c7ea8eba86c97a061e77f580866de53af81
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_6.txt
@@ -0,0 +1,64 @@
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
+
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
+
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..762921e2ea5bda9b716076518c3d6ad6148b2003
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_7.txt
@@ -0,0 +1,39 @@
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
+
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98d707af72adb2bef10f6cc84135458b66f49fea
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_8.txt
@@ -0,0 +1,66 @@
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
+
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8de4ff60acd06e61f39ea95a022d4e93648011bf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_model.txt_chunk_9.txt
@@ -0,0 +1,40 @@
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e04408b2e307230ea49a63f209d6a61e99fb09
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
+
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
+
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
+
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
+
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e36f6ce5530e935caeda4b2fb639697e385fe71
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
@@ -0,0 +1,83 @@
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
+
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
+
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
+
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..448c78f96e40d75aac6bbb66c9f75c0918923eda
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
+
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
+
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..092bb77f0271e3c771b3b38dafdb63b93accba51
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
+
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae817afc5f0a310e0020d291f02942234bb35b0b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
+
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6c92e378860eb2d36f5d5eeb05ea68d2e690176
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
+
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
+
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836e7752dbb63f03d1fa99abacb25f144db5910b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_3.txt
@@ -0,0 +1,67 @@
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
+
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
+
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
+
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..879a4c655ceb586db531ea46c976405ab2f4dfef
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_4.txt
@@ -0,0 +1,63 @@
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
+
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b8025caf02db0c2881587d7ff06158913453555
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_agents.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
+
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_attention.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12bfe65cb566a30a141f14020f69fd3471f44604
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
+
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4e5c67efb7e6c417103555207e1f0a563ad8d90
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
+
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
+
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
+
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f013acd44c00303181e4639543bd1e5c22092cc8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
+
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d4a86e16f581794dca4ba59bea595e03f3c79bc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
+
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
+
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0bca6589c083b66be72d001c47d7fbbc02a04cd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_1.txt
@@ -0,0 +1,93 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e7c2cb4f78f84ad9c822ebc8ce1d31b0098e043
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_2.txt
@@ -0,0 +1,80 @@
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e2106746ad52af97d44328949e9d8830f4bd41c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_benchmarks.txt_chunk_3.txt
@@ -0,0 +1,65 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
+
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
+
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_bertology.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a3ec657c3a7176526ec66941579549dfe87cb22
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
+
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef4dbc62d7f38e3d61ea0c95560afe4e2e86a87
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
+
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
+
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
+
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fa64b7ee676cd81311f1472ad1f8c627e7e2bb6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_1.txt
@@ -0,0 +1,73 @@
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
+
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
+
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
+
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1da4c3f4bc6d331faa86d899e9edce259db60b2b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_big_models.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1871ab5892f79bef94d803624ecee42afa32fc8d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
+
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a96b3282a97ea2603d18f27f9c17e51cd66fdf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_10.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153602130528a8571bbf5e10c45ff281652ecc95
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a04999f361f9ea2ff629b91403459ba86870f0a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,67 @@
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc358a6303ff6fa13bbd2a7d52583e80557883a3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_3.txt
@@ -0,0 +1,68 @@
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
+
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
+
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3116cc538c6c07c94cb3130259e8969644982578
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_4.txt
@@ -0,0 +1,69 @@
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
+
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
+
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45c0f7adace4f2cda627d3d695b3ff0e08f01126
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_5.txt
@@ -0,0 +1,99 @@
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
+
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+schema = get_json_schema(multiply)
+print(schema)
+
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd40a09c04b27a1829592d5b0c83fa10312b204e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_6.txt
@@ -0,0 +1,31 @@
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
+
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e59d9a3ec12bc91473aad53f385ddf702b7a27c3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_7.txt
@@ -0,0 +1,70 @@
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f40e2a23592c20543c0d37e96e36645d87bb8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0ecc78484e0b51206abe84828da3c868d8d80e0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_chat_templating.txt_chunk_9.txt
@@ -0,0 +1,121 @@
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_community.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c0b026adac7f4003764e19db847051a7e73152
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5734cc5d905a54b153a6e25db6d7b3cb9af54cf0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
+
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
+
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d220125df890dd3eed525cc1568b3b2f5f7047e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_1.txt
@@ -0,0 +1,76 @@
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
+
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+
+   pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3c0be54a7ad8196269799f9be7a107a7cd8226d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+
+   make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
+
+   git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
+
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ef10249b21ef13be54132032da45e7ccad0f988
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_3.txt
@@ -0,0 +1,59 @@
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b33e90c687c49cb413af83c5d2684a9644fcaf8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_contributing.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
+
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6841b0f6cc4d1681752a35dfe3254676c5adbe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2baccc749f26e1d6444f309aaa31a3574c63b625
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
+
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d123f516fb804bc17771ffbb9e665650a10ee5b6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_2.txt
@@ -0,0 +1,64 @@
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee7173b957caca5274eee4d50c788c1b857c90d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
+
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
+
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c017eacea9f136a523ce6a2bf6b83175ec8db62
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_conversations.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
+
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f264aaaa60d97b4ee8c9e8215c0b0c67380487d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
+
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb59db9115dd1fa94713e7ee7243f5cf0a80559
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d9608f63df705479a7afe385499e42306ce04d6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_2.txt
@@ -0,0 +1,70 @@
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
+
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
+
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
+
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..095e8998a607856fad9787c7cd44f24943803d4b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_3.txt
@@ -0,0 +1,74 @@
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bb599d2bf50e64e23736b5803e5711f9b3e75de
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_create_a_model.txt_chunk_4.txt
@@ -0,0 +1,48 @@
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
+
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
+
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
+
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb5bf8176e36fc8342ae022febd0a840c69adee1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
+
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
+
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
+
+    self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5facef75cba55494f7e79b6146882c3444179f11
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_1.txt
@@ -0,0 +1,83 @@
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
+
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
+
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea8074660344af7f42a8ec65bf90dc13cf7a8a47
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_2.txt
@@ -0,0 +1,55 @@
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
+
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
+
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
+
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d930f42d5e426bfa2c63c5d78d0922ebb4f3f2e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_custom_models.txt_chunk_3.txt
@@ -0,0 +1,69 @@
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
+
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
+
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..113b1fd526280e468eb9bc3c3220c958656bac4e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
+
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
+
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
+
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
+
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
+
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31451c129ba0d8bfb53087481892a16e4ff6642b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
+
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
+
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
+
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
+
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b89e3ba7b2e5de8059189f3048b079a5516d9259
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_2.txt
@@ -0,0 +1,106 @@
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70c35fabd79671cded226b1f14a9c2a526f7acf9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_3.txt
@@ -0,0 +1,84 @@
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
+
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
+
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
+
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
+
+                  *** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
+
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abeaf5e7e0b4833d782b1d2ac99f65c9f1bda081
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_debugging.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7444eed55726aca30ee76fd1bc5d75afd606ca43
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
+
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
+
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c79b316410669093c732a0cda0af1f2659b11e1b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
+
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
+
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_10.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc0f1b37526efade98f1ce782381c16e9679f0eb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_10.txt
@@ -0,0 +1,49 @@
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
+
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
+
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
+
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_11.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd3e3f3f28e713f598ab854e96bd8e04f19ef198
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_11.txt
@@ -0,0 +1,87 @@
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
+
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_12.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c20ac8bdb5883942faefc67bea45359b328c6c3d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_12.txt
@@ -0,0 +1,68 @@
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
+
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
+
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
+
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
+
+Save the script as t0.py and launch it:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_13.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f84cb918992eb66c2ea9c365f844ab2247468755
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_13.txt
@@ -0,0 +1,50 @@
+Save the script as t0.py and launch it:
+
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
+
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
+
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_14.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2352a1ca51a7e19ea74c77c6cf3dc98dab9954e6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_14.txt
@@ -0,0 +1,22 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
+
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02e1f4169202d44b4aef6b864ddaccfc23ec3886
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
+
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
+
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
+
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
+
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..373a16c4e4bd2255b1916b2d3517546af42a8303
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,63 @@
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
+
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
+
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
+
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad14c44ac5adc36a1aa25bcb5a0f8e7feffde6d6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_4.txt
@@ -0,0 +1,47 @@
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
+
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
+
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
+
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d834f34efe17e084a28d6d7b9a5d22abac66dc99
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_5.txt
@@ -0,0 +1,93 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
+
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
+
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
+
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
+
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
+
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f63b2fa12a0ed32eaef2b2195a58f8d703d734
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_6.txt
@@ -0,0 +1,77 @@
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
+
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
+
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
+
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
+
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e15f557037a6cb56ed507a8c7ee64ba80bf2b1ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
+
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
+
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..deceaac5ebd913f7796f862bade5777576000e77
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_8.txt
@@ -0,0 +1,66 @@
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
+
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
+
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
+
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
+
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
+
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
+
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c348c78c62200c6cd810f37be16c9eebd5685d18
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_deepspeed.txt_chunk_9.txt
@@ -0,0 +1,95 @@
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
+
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
+
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2645dc0d9cf9125a758c19d06a1ff07d3991539d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
+
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d491fc68aac98b83d229dbfdd72b460d535690ec
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
+
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
+
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
+
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba3bf2cacac91daf08c2f69e3d438314c552c675
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_fsdp.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
+
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
+
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b89d4b56f82e56b141b37f6dc85ae2a38f92c6e1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
+
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed05680a63213facc4fba66ede3ab1ae6dc42467
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
+
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
+
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
+
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e51c9487f19d54b075ee0255409df86b0641ec16
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
+
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
+
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac0f58c70da5aabc1710ddc4b822f89425f5a39b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_3.txt
@@ -0,0 +1,52 @@
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
+
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
+
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39e2d75f43ec13b1658f60976b528a5cb01e3e49
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_4.txt
@@ -0,0 +1,57 @@
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
+
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
+
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..416a994e7ca5a2196b72483d23ed58e93dc86218
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_5.txt
@@ -0,0 +1,48 @@
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
+
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1bc5cb3100bada3a4b909dfd8a2db9357ff2e6c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_generation_strategies.txt_chunk_6.txt
@@ -0,0 +1,45 @@
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_gguf.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..578db765add050a76ca50d5404eb46cbe3e10df8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
+
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15bac728eb12a68a395d4e5096e7966992e11dda
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
+
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8652a823c86747ffa1f0fecc18b942c386a40101
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
+
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
+
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b46e888d162c54fd271c1b04056decf1ed69d8c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
+
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
+
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48bb0d85c8fea5668b81b40551964022afcec319
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_3.txt
@@ -0,0 +1,60 @@
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
+
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
+
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
+
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3ccf13a171c4458a8eab492e4dd083971d09af5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_4.txt
@@ -0,0 +1,83 @@
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e84cb320ebca2dcac6ba869c0a22db442b67600a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_glossary.txt_chunk_5.txt
@@ -0,0 +1,49 @@
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dfe2dff0e902a8b8f8fee2bb3d0aab3a98688d1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,95 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
+
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
+
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
+
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a357dd98be2cd2d2e403e28fe6864c997a493032
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_hpo_train.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f7ca2407db9ff3c1bdd11fc99e1e60c8b5e7a44
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
+
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9a57ff48967d871016cf9599d340c727a3d0ad2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_installation.txt_chunk_1.txt
@@ -0,0 +1,69 @@
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
+
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea3624bc6267101b2f280a84e24a088cf00e3640
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
+
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
+
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
+
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2121474bd7f78ec4baffef377a913aeb2882d5c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_1.txt
@@ -0,0 +1,70 @@
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
+
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
+
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
+
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
+
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df0d8104876edb1a72a305edea77125b04cd848
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_2.txt
@@ -0,0 +1,40 @@
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
+
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
+
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
+
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
+
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad7be6cbeff3278939f974aed11339491bdeca6d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_3.txt
@@ -0,0 +1,54 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
+
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4815c4007e3ae95673e6fee258ea8ae59f9e94
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_optims.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
+
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9445361e8215f6592b1975216b2972129f87c1a9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
+
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
+
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47ff74eb2adec9903ae7c42cb462a33025a5e6c6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
+
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
+
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
+
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
+
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
+
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54029983ad97be60f4ca59c0c8bb090cd027ffa4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
+
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7d573ffad9c4a1eaf0e6db152570cbd08c1dfe1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
+
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
+
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
+
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51db32e984106d9797597240871f89bbd25a5089
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
+
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
+
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..668dcf8e2882f625ec4fa58d6d819549f6bfbf90
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
@@ -0,0 +1,23 @@
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
+
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95e79c0da8c3ad3fcb0dfbec093e084b3a1828f2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
@@ -0,0 +1,36 @@
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
+
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c10ad3eadc36213ef58c0be36d77eab873ed1129
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
@@ -0,0 +1,26 @@
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
+
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
+
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
+
+Falcon
+PaLM
+MPT
+BLOOM
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abebd4a2426f7bf6c13cf653dbdd3bea3acc93c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+Falcon
+PaLM
+MPT
+BLOOM
+
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
+
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
+
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fcf2a71473dfa26e915b153d653301785aac608
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
+
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
+
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
+
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
+
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1888c37f8c4dda61ded3f3bb58cc018ca56fb076
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
@@ -0,0 +1,42 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
+
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01db2ed6a9a21a18341d67286cf029b3b436072a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
+
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bc490e608ab92c179e1ba26489f19df7ea78870
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
@@ -0,0 +1,45 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
+
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
+
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43ff9434aa644aa988999c55e9ebc3e5c0410a17
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
@@ -0,0 +1,68 @@
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
+
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c1cbbb3cab7ca1e572c7f74098f57fb724c968f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
@@ -0,0 +1,36 @@
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
+
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
+
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe32629e0231799525f26d9bfc87bcde3ce14724
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
@@ -0,0 +1,34 @@
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
+
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
+
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adb24b5c221a90587749146cebd6a1c94fd24fad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
@@ -0,0 +1,40 @@
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
+
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b00ba1674e1edae9e5bdcd17b7e59ecc548e9d85
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
+
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64063c4e97621eb44f546c608fcc8b7433d17eef
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
@@ -0,0 +1,65 @@
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
+
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
+
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
+
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d641e110a96ecbce5b34c84d63b0f88f4811d0e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
@@ -0,0 +1,58 @@
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
+
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
+
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
+
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20942c212fde00cc56cd67be2c11567d8c04f87e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
+
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
+
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b2597c651b6fc4e4f92afcb50db9df40ea5428
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_sharing.txt_chunk_1.txt
@@ -0,0 +1,77 @@
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
+
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
+
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d07fc42022f990a488311347ecb91c6572318b7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
+
+Computer vision
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..513d90d2157935aad7a83c74d5cdefcaf59b6e5a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Computer vision
+
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa7831319b5f0db38043c15dc7739d9fa9ba527
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d7bcfa957fca5eeffb243cacff9078fe27395e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
+
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a20ad9ed9b0732a11344edeb865119e3c4d7662e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_model_summary.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992d860fa734c3d87ee3707441f6951223a11e7a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
+
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
+
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
+
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
+
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63708dfef898a948e1b9236e70d89f3d39a07bef
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_multilingual.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_notebooks.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504f04ccbe55506269bfdf8ef52e645c1fe26e15
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,102 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7a7fbc5cf1a0cfec216ea06d3aa1d47906421a4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
+
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
+
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26429e78be0a4a35e3b1b147f2fa0c644164eba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pad_truncation.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4db7007b97a7d72684b75dedf834caee0c770ef3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
+
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7ab462061e9b3b8b10e8627e31ab00e798094ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_hardware.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
+
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
+
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12cce46c25c41c30d3fc497bddb98600bb00efd9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
+
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
+
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
+
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee074135cb41f23513078c70405c85c9f081ba60
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
+
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfd0e44653c82ad6cc613a9d473eed34bc478a0f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,99 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
+
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
+
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44f942a6ae2e2567ff57211db7f2bf9533aac574
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,82 @@
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
+
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
+
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e24a4978cf5e7bfa45b0bc4c459506d88051c18
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
+
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
+
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
+
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60955f7e51d6a371d73ec7c73ab4824d3ab09250
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b92b91dea10ec36854d71a67878834e857dd0038
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab3ce971aee2be7686a38906066cdf7dd7a712e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
+
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
+
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
+
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e7eafa6f3ba7859eb08a1dbe7dc8e98a36cd46c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,183 @@
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9226492f56fe8bded56bb98764a5aff1303bcf93
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
+
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2a2aaea679817fb1d1a22306d86ed0bc9a75ae7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
+
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
+
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f76ee094b94f08584c43d78d190ffd94b5bb5e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
@@ -0,0 +1,76 @@
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67aa1117f214acdc4688cfb5cf2edc09cb895489
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
@@ -0,0 +1,100 @@
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
+
+              resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7161a852c22dfb07d9c91081cfc812e19598a153
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
@@ -0,0 +1,32 @@
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
+
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
+
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
+
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77053948c296105bd2e202a95de485c0ce25d5f0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation. 
+
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
+
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
+
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad26253f6aeb540be9a74b1b7041cb14461ea8a1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,66 @@
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
+
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
+
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:  
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
+
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
+
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41944f12c09a75c3acec0a97adc24f0b146d3c70
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
@@ -0,0 +1,70 @@
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
+
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
+
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
+
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient. 
+
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4cf28b5bd76cd2132dba6c614d54c88c882c7ef
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
+
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed4e9a3d3ebe23290143c93dd204278e5f025be6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
@@ -0,0 +1,49 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4138bf5444b9c3c27a0015c279408aef5b07278
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
@@ -0,0 +1,57 @@
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
+
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
+
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
+
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84725477538e54b11b4749c2c88ab25c843b74fa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
@@ -0,0 +1,58 @@
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
+
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ac100789c1476789c0dec77c35eb04f39e7554f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
@@ -0,0 +1,54 @@
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
+
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
+
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ec60bfb7699c74a11212f3e05eabec08b1ffb8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
+
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
+
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5fcf1e748d91043e9582c993a69f845fccbe3f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,59 @@
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7367752ce2f8eef36446d7c9a2a8095d5ad4846
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
+
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
+
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc60342d962699945696426ad3af600520adc17e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
+
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c2e1f407ff61407b67cb388b82cecd88628bf99
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,58 @@
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
+
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
+
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
+
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed: 
+
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a56cec5e66dfb2f33e84c9a86f48510dfa6f55eb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,49 @@
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..545540fddb4fbabfb1131264fbb370da190a1354
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,48 @@
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
+
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77f88464dda1c6f5aebe8986847ff56c57baf237
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
+
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_special.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c49c613bc32f763b8dde05c2caa45fc9461a2bb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
+
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e994024b00190c978e56d32878ee38368d538
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
+
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
+
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ff1aa4bd5a1759e1f429319d8e854a5bcd1a4a6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
+
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
+
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a2999cfc12aa301b95dafea6eeed9fb93bc3f29
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a46d875b38325f1def9d35939f7038a05888c5d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
+
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_performance.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3eec54e090ccbeb20e244323ebd29183ebba5547
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
+
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ac025632b906c0ca6663a7c39471007c43a7029
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
+
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
+
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
+
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..879d9f4f2ab34de235e0edfbb42b699ea2d3909e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_perplexity.txt_chunk_1.txt
@@ -0,0 +1,52 @@
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
+
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
+
+    # loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73e4f8589b7275ffc2ca3ab5ca4c68810b7cddbe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
+
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
+
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
+
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41b61b087d99c602ab87e814b4cdf53c7d93053a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_philosophy.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61285734f6be66921736e32df4050a5622bbd094
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
+
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
+
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e6dbb47b40ba724460c6afaf42ccc6c92ff884
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
+
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
+
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0bc4e0fcb37b8355dbe7fcbc4373608697871d8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
+
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
+
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
+
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
+
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..400bcd0c5afd6813588abf70f7d81939ac743344
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
@@ -0,0 +1,63 @@
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
+
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
+
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa8c8280af4794499464eb2614f520ccb134f46f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
+
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
+
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0143379bdbde29d59e22532811a4d91549bddfea
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
@@ -0,0 +1,58 @@
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
+
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_pr_checks.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b34665986ee285000bf0b5800b2026cd3e06fb7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
+
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
+
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..218fc45de95efb64662c1cff5aa2a72ec4974abf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
+
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70da8b9f6a3c60231363d987b17e262ff9a4f4d9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c8cb17fb1701ba3601478f83803525dcb72ffd1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_2.txt
@@ -0,0 +1,71 @@
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
+
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
+
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
+
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eaaf2265b6ec4f62480a45cc893ecadd356e590
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_3.txt
@@ -0,0 +1,74 @@
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
+
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
+
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
+
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8153f9e9d40e66b1554a12330ccc1a744be14f9c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_4.txt
@@ -0,0 +1,74 @@
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
+
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
+
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
+
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
+
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed96aec3e2f0043dd38b5bbe2a07a3008f365878
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_preprocessing.txt_chunk_5.txt
@@ -0,0 +1,34 @@
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffcd3d642b0eccf78a1d1d894ab17255460dd5b3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
+
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90d6cb0cfb91bd5aa61aac18d39745fbc1083883
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
+
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
+
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
+
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
+
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6e1728cafe62e1803ec01f9128c616931a43864
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_2.txt
@@ -0,0 +1,67 @@
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
+
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
+
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
+
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
+
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..012f1e8fa802679d7996c72080d5f614baf34810
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_3.txt
@@ -0,0 +1,70 @@
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a2140bd50a75681294bd5cef9290228ee803317
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_4.txt
@@ -0,0 +1,91 @@
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
+
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61fa8f1a2c30770a250a714bcf11906f91d2ddcc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_quicktour.txt_chunk_5.txt
@@ -0,0 +1,35 @@
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..293fe69964e87b7e7a2815fb405b296c198d86b8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
+
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1285ad5e0005012f006bcf331190d245ba9df229
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_1.txt
@@ -0,0 +1,79 @@
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
+
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
+
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6482e999e26fb70e39dfc572b735ee652c50209
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_2.txt
@@ -0,0 +1,79 @@
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
+
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
+
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3021c4be3cb0de940956b1b240a18410f4a81178
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_run_scripts.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_sagemaker.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..729040cc3e9174e79014395809a22aa089bc4efa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
+
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
+
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da18f7d1b844d40b430a7f4142cd4f313e63feac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_serialization.txt_chunk_1.txt
@@ -0,0 +1,78 @@
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
+
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
+
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6ef0ae7db0b0ad6ec5e61b751ebe28656e1cfb1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
+
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
+
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
+
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d084ba1ae1bf7e4f90b4b423fb6604eb267d05d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
+
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
+
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
+
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
+
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
+
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c95eabc4ed8e1e92a116cebfd000545b3b552e3d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
+
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
+
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
+
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
+
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..694453f05ac7c15329c371e6fde8f77be3ae7c85
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
+
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
+
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
+
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
+
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef9594d7f05d6e4f3d7e1c19e0b5c518d21d9f79
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_4.txt
@@ -0,0 +1,47 @@
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
+
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
+
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
+
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1898e4a1c0a12e19832bd564d0c7e39bc09c0640
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_task_summary.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
+
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15d817bd95a5049df818ac1de030f1069b2f5d80
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
+
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition. 
+
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74f31e45bbee636a6d97fb2ac1e356a9ac3e91f2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
+
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
+
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
+
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c58c1312148f6fc7ffbdebc00bac8ac4742b678
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
+
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride. 
+
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
+
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
+
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8923d5660e5b5cd12abbfd2f5cc0ee7fb3c439db
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
+
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
+
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
+
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
+
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
+
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41fdac9eb7e3001d88311ff8257d7760dbb44cac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
+
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
+
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
+
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
+
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
+
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
+
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..029b0ab696ce34554456c08986bc90ca68da0779
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
+
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
+
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
+
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
+
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dde8ca133454e420af13424fcf218062d5c0b8e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tasks_explained.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
+
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a316fa38b6bebe09ab788c6b8c2553e93c13f01
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d96d3e6a51bc4cca11b0f78bec8f35ee8d48bd3d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
+
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
+
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
+
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_10.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32e7af44eab4602dbbba90b7b7a1ad8b0d1d6799
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d33a18cb92b5db2e7e9d3a31923a3daf75715f2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,82 @@
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
+
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
+
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
+
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f864b16f0e13f18490157494086d8da96b95b35
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_3.txt
@@ -0,0 +1,72 @@
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
+
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
+
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
+
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef302b8e25efe76d5c1e7f605301f9097edfe3a4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_4.txt
@@ -0,0 +1,95 @@
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
+
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
+
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
+
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
+
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
+
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b17533b136b6dec3646c97ff60120f79d357aa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_5.txt
@@ -0,0 +1,90 @@
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
+
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
+
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
+
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
+
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d488bc35f2d95de9d922064c265da632828ba14
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_6.txt
@@ -0,0 +1,97 @@
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
+
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
+
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
+
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b2e6fa350fc1e0705a26ab1e5819126ecb2499a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_7.txt
@@ -0,0 +1,62 @@
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
+
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
+
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
+
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b587175a6f340d37c6a77096f60da30a0230c07e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_8.txt
@@ -0,0 +1,98 @@
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
+
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
+
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
+
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8f87f5a9c1267e189edd2a510e14b949a24e36e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_testing.txt_chunk_9.txt
@@ -0,0 +1,87 @@
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
+
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
+
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
+
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1e3d42e22a43946684fe701da6daf6743620fd3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
+
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
+
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd826834ff8675c944410dd1a638fdf5e0274aeb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer. 
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
+
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
+
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc86d9a8ca294b2fceb003850bb5b0e2f8edbcfe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tf_xla.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tflite.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10d6b20dc5810ec98cd50ca38028dc7e19921a91
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
+
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4159477a76603f2271b98047d103ddf023d3c1fa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
+
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
+
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee7a68c08995e0eeaf8e52122961c6d016bd4a1f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
+
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
+
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17f96856aa0001480ccee5600d91703a37a607b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a2e50a0b3faca8fdb4a71aee412c8d0fa408780
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
@@ -0,0 +1,38 @@
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
+
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9220661b878751cf3caf35c46a6ade20f24118
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2fe7208b3a6623c34f5775f0071688c18e65f92
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
+
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61f66e3758a8f9086359d776fa7e6b4dcb775c31
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_1.txt
@@ -0,0 +1,79 @@
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
+
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
+
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c73393df6672909f0b91d63b298260ad8e8fd037
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_torchscript.txt_chunk_2.txt
@@ -0,0 +1,58 @@
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
+
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
+
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a7dbca2f7e24df5cbe478d938157a258b463b28
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
+
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
+
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
+
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
+
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
+
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..762a2814b057946e6d6ae6205f01652dee2788b5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
+
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
+
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
+
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
+
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
+
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfb24006808163541fa96105126d933062f4bb18
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_2.txt
@@ -0,0 +1,86 @@
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
+
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
+
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
+
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5fa6c54e50e3cfab726737578ea4a08337c2814
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_3.txt
@@ -0,0 +1,64 @@
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git. 
+
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..675d808272ec76c615abe8a724be160e21803a32
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_4.txt
@@ -0,0 +1,125 @@
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
+
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
+
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba621f562211a2d17138dfdad957da9c26200c36
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_trainer.txt_chunk_5.txt
@@ -0,0 +1,36 @@
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
+
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbac69df484be0868b52a4d7a30bea70aba3688
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794b0ef581eaa16a205d2f769ec5dcb19a189196
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_1.txt
@@ -0,0 +1,66 @@
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
+
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa272e88c646c273f62460eb808fcb78396c76e7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
+
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
+
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
+
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e54e37473242e2444c0786bd85212772a7b49fe4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_3.txt
@@ -0,0 +1,94 @@
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
+
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
+
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1d6f21aa42d2bf4f15585a3d57369827fc998a9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_training.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f0c443f609f61ef6a722a57760e81dc6cf4fb84
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
+
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
+
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7bbf3c691b8b4bb0b44aa54478497fe5e827be4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/_troubleshooting.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
+
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_audio_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_file_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9378d8ffe6dfe4d9f90050d632bfd2c2a1a0fd3e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85d68ef683eb6885ecc23070c3347b5a5430b7c1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
@@ -0,0 +1,223 @@
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_agent.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..788e8f286b20ac6f788f4ef99614eb77eaeed12d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
+
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
+
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_backbones.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cde1b5b205f04ae43291411a7bbc19f3c7a73bea
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
+
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_callback.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..914898783857334c68b7587c6834254ba0fb7808
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
+
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
+
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
+
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_configuration.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f39666365c9363c7c65c1077024bfefec8e019d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_logging.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a818ee26a8d69b0fece49fcae19d221477c39c6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
+
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
+
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
+
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a686f8babb6a7b2e9617ffe3ff6d2e836b47c9d3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
+
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_onnx.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6294e6269d2344ebf4a51e8c458c0b32de7a22
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
+
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed56043d2b4b86320b73f9956ea121bf314b6348
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..436f311ffdb79e03f447a352d47bfab27e448776
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
+
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7243b91fc0700f26781da80dd55665609d2fe0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_output.txt_chunk_1.txt
@@ -0,0 +1,139 @@
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3db0f81c176b8827b9ccd77debd5f915b8b7ad5f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
+
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..838eeb7d76262e8eb21354fd9e5ed4d61434c680
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
@@ -0,0 +1,86 @@
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0f60dfc38c1d8abccd083462bcecc0f920227ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
+
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4c8f6ac901e12cbc02da5222763445a0ed6c972
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
@@ -0,0 +1,124 @@
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a9ec40a998567da0708869624fa2c29bf8d83b2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
+
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
+
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c74db150983db2d98d929c001ae105e2d0a8333
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_processors.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
+
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_quantization.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f06d4f5eeaed4c5f87655fb147c5d0b3025f8f3c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95582c89cc7013da6dfffe75462743679d06c8d9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
+
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78b98dfe716fbfeacf0894171a5da066889cb494
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token). 
+
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
+
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
+
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/main_classes_trainer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7741630488f2fcedc580d329ce9237d611421ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
+
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae308422e1d5cc9f901ceb24ea94d1ecffefc25b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
+
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
+
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
+
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9111f80eb2721ab15d10fc50c95531d019f8afe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_albert.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
+
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
+
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09c7faa7d04e7c784664a51d507c98e7273f9696
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d35128b2da8019b992092bd6b70900693190c61
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_align.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_altclip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f8d8988be560d924ab8d71503c2e90f9e1c1a86
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6942a281523c0eb35ef3568ccff680a3878ab375
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
+
+ Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..195d78d3bacd8e8ee7dc861415c8278f1abedfc0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
+
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477660aacb89b972bc2099520674f542b0e8349d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
+
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d78657f67f914714f137f614c5c89c4b7cadf78
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_auto.txt_chunk_1.txt
@@ -0,0 +1,162 @@
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f143a146e88d837b9935d1e40c3f3c3406b5d3b3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2d1d9525bef41de4dfbab3a9f526f91ea57f695
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
+
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
+
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f060150eaf2c7ea7c34db730c906796304329ccd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bark.txt_chunk_1.txt
@@ -0,0 +1,87 @@
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
+
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
+
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
+
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0543ad87281023f29efed544e286db6cc8ca78c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
+
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
+
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd60aefb6a7ce90ae7b50dcf6cbe6768899d9f14
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bart.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
+
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
+
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
+
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_barthez.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f3b193529d218fb562cadc9959233ef18db1c13
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.  
+
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d699e305db5dde4f1fe1fc071e4b3f900257bd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12cc89a301ab5a9c8f15579a550526d823dd15d6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3016b20acbc0d0690aad51758dfc3fe9e2d80967
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_beit.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc7d5a61372ad88e12cdc2537aec003cbd3ab6d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
+
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
+
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
+
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4bbd57a1197ff4b017ad4bd1c853a4615b2446
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ac393bccd0851fa52ef2deb98dbc37c926c365
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BERT
+
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14fdd2cfef2be37cc7ea5b6ff3cbef00c6b45e8d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..970297e3f02a5cc46ebbf0957cbcec01bd04e578
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_2.txt
@@ -0,0 +1,84 @@
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
+
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9f4e7eddd9a91b205c11d26043f19d29bfa450c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bert.txt_chunk_3.txt
@@ -0,0 +1,55 @@
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
+
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db216fd85516a776ce583f8642525024317eecf7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dfffa58dbec50568135dfedbfd2bff207830ff7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1cd5b636a113ffbc4bffc3d6ad9915126bd12e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac44817c5ae7b075f8d52cfb5fb29de213c47b3c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c768bfe2f141f8eb3a78ab1c6ca1a3334dd9017b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3b8ef44caa1c71f6900dedf1f7749ac1a939a81
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1d7ccff0434f3499336620c315692bd292526b4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
+
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30e7afae642b43c3115561662916b36b7bda1b2a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
+
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5db0b870b534e949b8d69624054095100425cdaa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f46e7e9f3ba09e416c794e16840d21e30199db
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
+
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
+
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
+
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bloom.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f99dfc468ec4184ed17682d20dc71a7992a0df9f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bort.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eada2d1950fab7872518fad0142571a869597973
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6b3e9d2cc9de7cd1199bd33352570416e9b402
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
+
+ BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59d2840ad63bf797389c47fa605aefa66e3df123
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
@@ -0,0 +1,69 @@
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
+
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5a73b1c4d81ccab9bad0baa831ceb317e6a19ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
+
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e74821afcce16ab6a7e4b72a2d02bb3e6c642c1f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_bros.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
+
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63f9f7ad40d1cc98cc1b807480a35e3eab01194d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9a5e239e1a2134d9e433523e5d1408d80157015
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
+
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_camembert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d516af79e87635156fe91d79586af436a51bc746
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
+
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
+
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..083ca58cb0686daafb44bd9abfeef996954d8e9d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
+
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..522f796d5c870edaaca275f161d09e9b7c695487
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_canine.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
+
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7545fbfed233c3d90cd26bfb38adf67cbb201604
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
+
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
+
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
+
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_clap.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651c89af78bbfc448db279a2a241385d8698e566
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7471e322c5f27e97a5f2c8e9472514a3a645ab0e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4642f17d897c4a7f880fafe4ed9051f161219eba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clip.txt_chunk_1.txt
@@ -0,0 +1,91 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
+
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
+
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2280fd021f192fcbbd754d766b2ed2e6df5f0d5c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_clvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cca4fa81f20e7b4e5a43bc4a0cf603d46986b874
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
+
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c88caf63c23f6e40a4d60518fb152ffbe685e19b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
+
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
+
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
+
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e35053afa5ddb3f453f4820043919006ebec4e39
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
+
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below. 
+
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_codegen.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b542cd5463cf1de2abaeaf51902053651df302c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
+
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
+
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..044ccb4f2ae8eb05d5fbee6507bef620ddf16f28
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
+
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc7d0a61b9e4d51513cb9d8c089ace1a9132505
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
@@ -0,0 +1,52 @@
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4b2cddff53e127f244ab98a7081de9449895fae
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
+
+ Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_convbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e32c7932b559ab71672efc1291e4d49d5c1e81
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+ConvBERT
+
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
+
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
+
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnext.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de0ce09b58e4da4c790f0545f40aab900f5d5fcf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
+
+ ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
+
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62cb8753dc6f845e0aab7d64de81e5fbaa4d4d00
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
+
+ ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9561219451f8bf41883c98d80b4984f3b4cf160d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
+
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd37eb7179fabc3861c0ab3377f70088d55cf218
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78204bbea8ce172ff8833cf0de1b3e088866869b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CTRL
+
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
+
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_cvt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..accd09683e0557fadd2ffaf760454536124605de
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c60869fcbfc5cdf6dd39b15066f5eb5d9161551
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
+
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
+
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d795b051c791ca3570dd36cdc540e897bce1f21
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
+
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b378454b16e96220ab23f0982e507f1646cfce7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8d94e67f02f76597591f9b9c63debe358e2038d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2459cf0036771f734b52a4a126df11ed2974deda
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
+
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca447f7a54efa34ecc0b25f1687884bfc28e1d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
+
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0199263ac64de47587b15980e6cfb80e2e598b54
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34766e50d453b68eb5796d2122afd0b8f9408300
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
+
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0414acdf856a4798b3697ca69b041cfdb6f68b34
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
+
+ Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ec100e6a99ad8eb66c124c6234f4d0892fa859d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f16a190a3f73f1310ff7a4d378fcf5fab281ddc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f7ace91058e8ac7e39fa805bdbdc96ffa04ec67
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deit.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
+
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
+
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deplot.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88b1337ae1eb4f22cef55c457453ebc622bbd1fb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
+
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52b1634e2c44e058cc7bff43abcdbea1a205b678
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
+
+ Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cc622fd8a04459ed792319f33a2aa4293674327
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_deta.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6a39e82094b6d1d26d02f14a59a690963d5e2c3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
+
+ DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14731807b974ff1a00d517f13a2fa29acee7078c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16bf8645bbeccec1a60eb1ffd793fd2d58cd7608
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
+
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c0282eaf1a7a0ec824029fee61072b290200917
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_detr.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
+
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7f2d6600e4966956a863bcf443417941d9ce83
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
+
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
+
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinat.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0e131bd78fa9f0970b31aea1d89b5487460b39e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
+
+ Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
+
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b418e8532c2e35fed26cc984483c115bbe2fe61
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4be6dd96c40281276aecc8d42a743ecc788f96c3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+DistilBERT
+
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
+
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02d41befc66f8b1a40edeef543ce8b19d6be8566
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
@@ -0,0 +1,101 @@
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
+
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214caaab48358e8c3453346f58f7442cea5001e0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c6d83687397924fd229efebcfb00df1effcafe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
+
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
+
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..042468fde274280c31a0cf1a1fcdf7c3dfa2d288
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
+
+ Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
+
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Document Image Classification
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afb424476423eb822916bbb73cc333e36b5e1936
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+Step-by-step Document Image Classification
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
+
+Step-by-step Document Parsing
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
+
+Step-by-step Document Visual Question Answering (DocVQA)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..650c47f8323c43695fc88ced36f69cf61a29e98b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_donut.txt_chunk_2.txt
@@ -0,0 +1,56 @@
+Step-by-step Document Visual Question Answering (DocVQA)
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
+
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13a791f93a0bb29ac927a03aa216acc67b24e4ca
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+DPR
+
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
+
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
+
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca4189eba220efa014bf79caa8c9a73e8304f15
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
+
+ DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09d9964d0c37a2c3173694b7a12b75bb54b5414d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
+
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56807a8b71fea7ab6b83a565a8e6ff994d842e4d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+ELECTRA
+
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
+
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69b6c50066af533917dbfcca8a61eea03a05aa53
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_electra.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
+
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
+
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
+
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_encodec.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..583a1d565adee2d951848f551713ff0708ed85c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon 
+
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
+
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86b0f8b6a451e6b0c9e355dfe8e38cc35ed5d958
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
+
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58248ddad9a6174c104dcfb2e529c20c1d8910d3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
+
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
+
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0de7cb4d5709f1ac7d8c8d824baf6388105e73a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0b770107a8d4ebea40b81a3cb339b48f5ebac4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb5834c760900ef862166ba0fb041abff53b391d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
+
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12e47744885afc032ddd1b68fa8d980d56759fa0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10269cea795b45d8f12538c69548406f26a75c7d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_esm.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
+
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_falcon.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..437762e2371d95148120ef2cff07e89b9db88899
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
+
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
+
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7be0c6eb9d409fc9e0cb93d96188b23d590ae2df
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
+
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65209826f2afdbc66e7cd50afb31e02519a23c08
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b95904ca2212ee86cd779efda5c74e535b17f8c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbb237cd0dd77dc0531d1bcc599e6cee8ab023df
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
+
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
+
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..540ecb53fbf9672758376bcf0611e1c757310f44
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+FlauBERT
+
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
+
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_flava.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c7e698f36a60b53ccaa4473e349b22fc528b96
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..210536026778a1b913348b1ca953820d38bf55f3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8e451c3fa90eca12aeb8ca9a43535908d609e2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd5106041b29603fcff622fe866dd641c49be6c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
+
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d5af5e1b3ef6cda6979944c5ba66f34a7d62bf2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Funnel Transformer
+
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
+
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d6e182ee85407bbf517a518531f7a5450cbeb41
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
+
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f31671252a8b57af9dba9d3c0b8bfb0b7bb655
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
+
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
+
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faa994b6a3eac4e010b8534db5f36eb3e9441523
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3afa411d55ccd9ae8f47ea73776f47f9f62d62d0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_git.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b07fee84014ced208c73902ebf0b9300fee3e5b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
+
+ GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_glpn.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f60651d42ce0f66c3e41bae4ac29691389f7110e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e974be9e77f43dee52ed11bac1861647f19f324e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce9219861b78e648c91b6a84fc3bded35a171032
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+OpenAI GPT2
+
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
+
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..351ee0b4116fc01a4149930071610a0bc6a43138
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cbe48378bffeb2e6e0359b2925c4116aa0a21cc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e2a60f3014ed8499238709fcf030f625d8e09f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
@@ -0,0 +1,68 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
+
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4c81e67747d93a015561e1f5d8d3dd340de2f8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aae6eae31cfec40f1a37ad68680add258c2aae1f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..264e8d305ee2bd520f57ac31ca6e32342e111a77
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
+
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b845a0c92fb0e975df309530be43598dacf0016
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55cd13f19eb1f3a9b4362b7e2c3005972de301
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
+
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c6a62327d144d637c9e7b74dc1c7e87f540d84
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
+
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
+
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34f66d87ccb710d2f410a084c65138f949688ace
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f6caadc3b765069b4232306e7bd29eec191b389
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
+
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
+
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
+
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7711106686d5f0f7655e05dd094f04a385c61de
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50dd1c0756b70942d8bb51b24481fffb16bca6ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..930f9134a84e353fb831d4e3ab53487e2741f0f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
+
+ Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
+
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
+
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..014a9fd3cce0874f81286a2bd315717172f9189a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
+
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ccbc3767ab7021bec6113e6b3d76484c411a541
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
+
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
+
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_herbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dd4acae3b54f041cf13561ad2ca5ec80fc709c1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
+
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_hubert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a8d4182e176195191b97581825b2d58bb9b5441
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
+
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_ibert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..498657deaa792c653fd4f3d3dc9be4cb8ca11a54
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3d5dbf212cb8aca6bcd9a36b357b360ca7f5097
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
+
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
+
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3904c15987ec27f6626c9ce35de6f3ac37202564
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
+
+ Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9677fa77c0f822d3fcba70f3fd802662436d209c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
@@ -0,0 +1,76 @@
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
+
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
+
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
+
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a23de5898ce7a87067a0625d63369fc9039fb0e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
@@ -0,0 +1,48 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f31947b088be922acbd76f45dd9978bed9f9b75
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
+
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f898af3bfb3fd52a42986b26ae2e08e27c2c259
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_informer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3daf2008f240cdd213f013876d031db5eca85958
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b05b25a8877b807ec05ca33c5255eaa7922daeef
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
+
+ InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e98518f812e28e84ae0ab1d31e856caf5413bbc7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
+
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
+
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
+
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2a91ba5c8335d5cc309458178f8fce5e64e0187
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
+
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..193a50552fc2efc995c12f219202e088b6948aed
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b61231c3076741cb9934496200e932ebf73bd6a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
+
+ Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
+
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..931982a125d6f19ad1141e7a7b127f0d7246f630
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
+
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf468adc64aa3b0b25ddd3cbd3870419586a55b7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
+
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b69cb8ca06eb0a867f3b516b4bef8e639bc4c1fb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
+
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
+
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
+
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db83148fe6aa5467df307792820df87cf2b4f43f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
@@ -0,0 +1,57 @@
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
+
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abde4ccdd9b46c332c9b507c900cc7678ca65231
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
@@ -0,0 +1,59 @@
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
+
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
+
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
+
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..882858463aa629b917eb27f9934630b4f28ad50f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
@@ -0,0 +1,68 @@
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
+
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..630cc4b6ca09ab303c06791277476daf68c1e44f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88e9be8d81492977adf92b74b5741c6849909864
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
+
+ LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
+
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f527bdfb21e939ab0ee67e9e59c855e2ad3d871
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
+
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
+
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529a4287fff0c600a02e3157fbf5439d3edafc69
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
+
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_led.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b91a39fd7264e045f4fc326f7c92d08f467d0b84
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
+
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
+
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
+
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd9056e008dbfc22ebfd742c74b227d699006710
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
+
+ LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..478b4989f4be55e62c7127c331b92892a5901e4b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_levit.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_lilt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e2fb98edad26fa9753dd4bbb621e2abf624225
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
+
+ LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc1a7d777324342ee0288aa64443e3012014df11
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..873029afedc08dfccb40b325d499be7fdaf32a19
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
+
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a92642dc818961f8a4a0f3fc52051489b4c4f2a1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06da3a0073238fc5a93274ef34b8ef230853020a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Tips:
+
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
+
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3645a11874a2228b12d5da1428551aacfde79163
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
+
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2129c5e21cc4b65e469e2d96fc4dd2d1e61d920
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
+
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3fb58d9e2562bde7378031fd103c37bbce9356
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e05baa168215920f9a91b7f8266af5eaed50e770
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
+
+ LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4008f154a20e3315fbc32f45aaae4a9832c12b10
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
+
+ LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
+
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
+
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6a873e3d86af1a042c25eddfebbb66d1bd2da61
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
@@ -0,0 +1,63 @@
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
+
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7eab75cb54983fc1f8516355c79731b14dda60
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+Longformer
+
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
+
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
+
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
+
+0: the token attends "locally",
+1: the token attends "globally".
+
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
+
+Resources
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b64f01efd64d2333fca5abb7ba80a6fba9860154
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
+
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
+
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a313a04eed0f6ffdf65a64eacd942031ca3b45ed
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
+
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
+
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3522ffa3683af1e597284d80beb9139c34e2649
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+thon
+
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1899653f79fe750e52a5d2cc98e038ad4abf04c8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
+
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
+
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
+
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
+
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..844a24721cbc3df50b7e5c1a3676e63855080ed9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_luke.txt_chunk_1.txt
@@ -0,0 +1,92 @@
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
+
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
+
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
+
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfc70c818ea5f066f1e8c255820c337b55dcdc86
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
+
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ab733cd8e4d76e2cc69ae5ed4990a80e526154d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
+
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92b01125fc68eabd5c7cf570234ede8ce8bf554d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7f48e097254205a7db44c38ea7c419bf9ac4c0b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbcf3fe67ff65dde01da55e2e5f3f388ca047c4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
+
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
+
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc3bdcd4df3c276259d89e1b5092b3a5634cca8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48e4af1ed714508499dfdaeb95be62c2d7158c15
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5acb7751a320c6105160d4eeaf636cd06d8097a3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_marian.txt_chunk_1.txt
@@ -0,0 +1,78 @@
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a89800a7e09b4bf0bcc9d06c23f97f1da84f5da
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
+
+ 
+ MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f64e83b200817bb5316a95747970dd49404c418
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
@@ -0,0 +1,69 @@
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7f1bed38fdbfd8f235ec98800ca81786fff79e4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
@@ -0,0 +1,70 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
+
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d4d9704b0f443700b2bbcaeff68229cc87dca10
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
+
+ Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f59059c5624c4986a861b5b22368722cf752e40
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
+
+This model was contributed by francesco. The original code can be found here.
+Usage tips
+
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3069c9399261c3ffd11da90877e92ba745369b85
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
+
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_matcha.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d5cf87ef382570a394cb9f080fb4bb8bb1944d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
+
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
+
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4bc7fc77fea52c5edc7c63a4d57dcba4d2d60d8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+MBart and MBart-50
+
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
+
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6563536936db4a206715f55beb4b8f2ac7f97640
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
@@ -0,0 +1,63 @@
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
+
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
+
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
+
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0d40ab6b8c6344e9d83fc822e233ec27d99b21e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
@@ -0,0 +1,58 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
+
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
+
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mctct.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e10fa7431ba3873c9cca6ce519ef8729e07c3b1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
+
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mega.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88552549b3ef33b4445c61587dfa362ada7f207e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
+
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
+
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..408489b086b1242f4b57203f72d1035ef5d3a7d3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f1ca7923ff301135a08eb6b88357785f0a6894e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9c58d9cb562385b017c90023c269baa3f6781f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb6c3913bfb6075a716ed9c5218909be2201297
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
+
+ MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa5b35ab18c64589627638238b9ab5851ede582
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
+
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eee2ce51b9717d53010451477d2ef9bba56af0e4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
@@ -0,0 +1,53 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc6d9f40a837fac5637cb7a8e22d9d11e8652a0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca4312ae61a7ee6754b8fb06dbc1deafb373ba82
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
+
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length. 
+
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9959ba0b79ee7392baeb1c29d3cf5d8fba1e7675
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dcade92f99fdc1b3512aef4656b8fd3d911e264
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mluke.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6bc67f0224589399fbadbcb1da2cf45ae71f6b1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
+
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98fb3c2ed5c0ed411278e60a702a21016d040672
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
+
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
+
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f847ebf83388dfa74e9963c7d5999109516dcca5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_1.txt
@@ -0,0 +1,90 @@
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
+
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
+
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f70d69585b1a4046a4a9c168cd5dc5521ed50016
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_2.txt
@@ -0,0 +1,81 @@
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2dac8d5e5c22066c9a4fc547e57d877c7442b09
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mms.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil. 
+
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..730ba7468f9df71fb54a11bb480159c491eba8a8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
+
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
+
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd08de49d026ca3e694b293b756c61d8e7fd13a4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0c423458cf564edd3078d05aedc7ad5777dbae2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
+
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76477a3d4595b82d0f2cfbf25b01a05781218d34
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff6ff0fdd54990dafdbb2fae955e3546536450e0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
+
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..060bbee944ea2c0d304eb99756bd32da64ed2291
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d38c1016b7696afe3e5a9a1f6fc2685e25fd369
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
+
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
+
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
+
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2bee7fa2fdd804c3bd2b9adcc7c8439b94ce78
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2483bd1cf7395f409e2f541c8c0b668efb527ef3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
+
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76a8c5ec386b5782e1678c9749aecd0c1da4ce9d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
+
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c246eb7c9e2c456b344876f4b1a7a438686b3da5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
+
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mra.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d818c84da9bb7dfb6ac53ca3642a7ef215301ff7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+mT5
+
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
+
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
+
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25734884a77260743b5c2207e7587d64034d4eab
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
+
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
+
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cd160fb086c8f4860d279822dfdfcbb40a59ba0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
@@ -0,0 +1,68 @@
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25f3bd31a938fab11694d859973e22ea298bf329
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
@@ -0,0 +1,59 @@
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abc0d9f15df554381abe4472d924a6f4261ce70
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42434ba682701276bbe880b5290f0ca4347161d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e4ba4c38e5cd27d05b6052d4e4674c3a9e7415
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
@@ -0,0 +1,76 @@
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da96fb368ac68c09fb4a43d807238198af853984
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d1179ee106ca2bcbb068f8b9de6e54662334b19
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
+
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
+
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..801d4e829c5e92bda332bff827b0716aa9e84143
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
+
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
+
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
+
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28480d4416c94950a965b3186ac927622d3b1ecf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nat.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e148c25116a6543e16b5ded77fb687898b19924d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
+
+ Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
+
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nezha.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8110b8eae1667b1242df7504127ba0bd14e57edc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6642343f49291f439ed272c61374fc412d9930cb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
+
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..157eb29558a4ff74dfd4e4074a7cd5e212baa343
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f916e96934d604bff9ab03e15004e3adafd784f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..872428c8692847e8bc97feb66fb0acaf03011beb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
+
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07cd8f55a0789cd9d27cd96fd400660d1573d302
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
@@ -0,0 +1,48 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nougat.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8241d3197cce33499cd202c3814db3486182e73e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
+
+ Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step PDF transcription
+
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
+
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477c74fcc24fdbd2481e0699f2b795693b865d59
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_olmo.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076e4fe9be4478ff5faacb07d3c94783c79ba1f5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6aecbf9577c551505c41f664d42fa926c99fe0ad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
+
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
+
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
+
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..983ada92addf8b33ba28ab96ff7a8f3b75bfb0d3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a74049df4b416deed0fd1eac4bb69e077a7257
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
+
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b340df887fe82878d0744c3532500a977fc52a35
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+OpenAI GPT
+
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
+
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17d7c63c1a4c478166d3f07ea76bd91f74a99028
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
+
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
+
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69f8532fd9ba203e51df5961b3882f91e7e761c4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f901a62e60e90e3e87c5c603396dda72de168c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_opt.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
+
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f4dc0d3eb7621b72ca3f0355ac518088c7ffa59
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
+
+ OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2de2d014b40207ed5102c48be9c66f42559e5bf6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
@@ -0,0 +1,61 @@
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
+
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
+
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2b2616b521547d84029329261aefc67e90979ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
+
+ OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..240dea19dc630f5add6e46cfc1e3a2ab0c33c6da
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
+
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f7e6d875541271ccd35a00668386b97c3671ca6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
+
+ PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
+
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
+
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
+
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06eb63ed80a5f41ffaa809b1ce1aef80efd77bdc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
+
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c840d5ace3591631fc11d0c0f03286ad750607
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
+
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
+
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25fe5d61fa3ff0897db972b1c12d6f3a7ce48561
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
+
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9658d2ecbd87c6297c0685a6a0533f2de335526
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36fda5f1545ded5871177aa187962cc8a33cd0e3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
+
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1538f830b25844080a803d1cf1433753ea975d1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbf0950ce27ae2cea65efc1fc86e6e4b2107f0fd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
+
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a29ab371cb174a30a4fa35f2abefa8c9ca513582
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
+
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
+
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..512d490392bd93ac0b1306e4163a1311adafc738
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..586385bcdee5e5b90ec2989a32c1b84eb50cd8da
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
+
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb1fed9435ffa4b20edf18792fb4b78c7ca7039a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi.txt_chunk_1.txt
@@ -0,0 +1,78 @@
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi3.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b74239c1c5e1d8eda5884fcfc36c1c4009353a5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
+
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_phobert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23f7107aaa727b5621bad4b78c097e36b6c9b9c2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57603bf68616efbabee1f3087741310a43ae1b87
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
+
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
+
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4084e0651fdf637f9462ea35e412cd2567be11c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b56cd823969ad6497667e6f36e64d9b1246bc98
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..450c058a7bf8024c518467b5f72248a866b52982
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
+
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
+
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..901553a82377c04f6a874bcf5d4fe47979ea4b0a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Pop2Piano
+
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c3c948df43f94c47873a0b9339c684e50746be7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
@@ -0,0 +1,87 @@
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f877b00eb3f803915588dec892a4ed3e3097d9ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+ProphetNet
+
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
+
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37913b89f5052b2cc200071fdbc0b5f901d45b8b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
+
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5952c7dcb8820c03024f10a3ead525053f4e394f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d35b386423de5c4cafe41872f08e5cb649e0c28
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
@@ -0,0 +1,57 @@
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
+
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c633ba5de92470ae8cf88f46d8b8b4a18a41e61
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
+
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
+
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
+
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..534f3785f203ef07fd50f1a8cd9859e26e559fd3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15384749174f8c2ee5612a027b42aded2b2a2ee1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea8105413310476a191258f73ff5992a09b0d1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
+
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_rag.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a628094a5a30283aeca8d5c56f8a54612823915
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+RAG
+
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
+
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_realm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d33ba5c135b806c653f635e183ba3631336c8ce
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0c81ba40661338f645d8570cf0397a3478d14fe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..761a50c7dbd478012e38d2d666a40199c2c089cd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Reformer
+
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4aac9270f592ec42ae8a7b2a43b4a0ca52b4cb9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
@@ -0,0 +1,72 @@
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b800c3693156700086fda9513ced77e561d7369
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_regnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df2c1fa933523d41213e49304ea4debcb9b5d0e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
+
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
+
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_rembert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f18b213b8b390d28db49103120f6c4532e584ad7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,87 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
+
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_resnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8224ebee571f719c90a0f91afc5e5d181e57b8ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
+
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_retribert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f51a9c3d6525e0bb395eb5e924ba7871aaaece
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c837946832536e1811155a4212442b2fa271a00
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
+
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
+
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
+
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b448c7fd39638ff29a3710ad6ada80cce1aba2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+RoBERTa
+
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
+
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28f78ae8df1ffdc8f0a60a8462fbacb1557d6c2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
+
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
+
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
+
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c7aefca3184c25dbf076381555a158fd12a8c1a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_roformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba21092a63da3791909377544ac13a33972bbbd5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
+
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
+
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
+
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c55fc87f62c896690f12d9a7d15ae323477bdcc0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
+
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
+
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c94c627454ccfdaa837104df49828f3574aece
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
+
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
+
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b907b200079b52cbecb78f0ab74e0053c6b90704
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
+
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5777e2cf21ef9c45f6f0ef1f7df77af28c125961
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image. 
+
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
+
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository. 
+
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91f86d11b669d8a526f4099005de2d3f68e73ea7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sam.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e04dff811fdfe1da7855817810aa4cf4d38a10da
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c551f829ca1c8534013b619ad24b195742e6262
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccf6ba8864751bd806e125a66b2c392ce4509148
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f750ffff35096ff66c0bfb58e9320a987ebade5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71e3143f1a74f7f9d3faf06cab9bf34dc313817f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..403f9f84e69a5617c56ba864cdea835a31b53245
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e23e81db147809c363e55e2a883755a881e00e0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
+
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff84d6fc82631dcaf14b5af57965432becab7164
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
+
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3601ff8bc0c3d93fff241738f609c90c6c4a640a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
@@ -0,0 +1,61 @@
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
+
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
+
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2058fe107dd13ac24e6f625f05f9587664942317
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
+
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe972727f0f0b027cf0e82c32b1b3a10d2ea3c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..928fe7f14906fd7e0ba244a22ca146e03b76751d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57e9c9a66d6a1788e24e9a3fe45f8dfb8dca461f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
+
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
+
+ SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..635f2d4bb2d8a18df33980dfb8e71aec9b05f91d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baea52321dd0b86c43a093f8606ac24970b73138
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
+
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c3f54156750647f8e5c102b859b27b6e2d1396d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
+
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0afd2d45914843a934b5f42efebc948d78b55bda
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
+
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f5c306cf71e911017db15a9e76aad55c27f5022
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
+
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44a200ca7a4cb87b2571b7ef4dce781dd8761156
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
+
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
+
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c02f9cb543606e90341e20cde9094b277853a75
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_splinter.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5ec1c95efc37b2def73ee884fc4c96a83b28710
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
+
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
+
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bf4f5efe23080c214cabd24a4c99f82c1bc4473
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
+
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0fbe484400f7eeb1e7e46e9c71b84c411ec3e9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6441555e3190a053dc4ccb76ddcaea1c9fc9412e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
+
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
+
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6910d5dbdf30f84f18105af4b24929b1675ee1c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da8290dd3de9b171d4106fafe4cac95f18a7d772
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
+
+ SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a18355264556e5d70230904d4c7eda17cd4644f3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8551fa61445b024883b2c35d365920a78ad7c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7efa18d57b98ca8b1be241d96c02903cefaf3968
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
+
+ Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3114077f1adbf9d954485cab28d390e0655aa4bd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
+
+ Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c9c6f58c44ceebbade3484992ec822c8c11d428
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
+
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cdcc9ad44b57c42778996724f63404f27a8e749
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a4c4eb18c2a09f466a5345a904e33cbf5402811
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+T5
+
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
+
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f18c4266ec2dba3121421b24da20adcd70499c0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_1.txt
@@ -0,0 +1,51 @@
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4433202be5a6ac9880757d0f99b752dddcf130ed
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_2.txt
@@ -0,0 +1,64 @@
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
+
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33438c28b3c8a0395d7b114b90156e1d67caf4bc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_3.txt
@@ -0,0 +1,63 @@
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
+
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
+
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe7f395598a9537843f0071d9df0fe881121fc0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5.txt_chunk_4.txt
@@ -0,0 +1,80 @@
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
+
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac2247d928e5a65d35c6504a925d3301a8fa9b0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb644b69cb79d0c6ce9cff7a851cfa13f64359f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
+
+ Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7846661e59bab9d4d357e30d24f3b57e097b93
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e093078029844236fdfaf78464038f3e7952d1ce
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
+
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
+
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0ff40ce494cb93c9b48123e21eb80921df6093a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f795048cf33aa95bffa3090f703408d35c25dce6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
+
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
+
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
+
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd2e7108f1bf7a007502d6dc2f282ecb96fef12a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
@@ -0,0 +1,74 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
+
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68df48e42b5cecab0a5da7c8e2f92ff9ca0f14e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
@@ -0,0 +1,80 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
+
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8a4fc9c42151fecb62a638a2065ded69d32551f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
@@ -0,0 +1,91 @@
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
+
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
+
+         # zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
+
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
+
+         # forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69ad46c07e80bd053ad885260acddd88224bb7b5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
@@ -0,0 +1,56 @@
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
+
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18added93966d74df3fba5a2d59b933447891901
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
@@ -0,0 +1,87 @@
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
+
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37405f42c6647d75ca88501940b118e3622ea385
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
+
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below). 
+
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8611f5f37d80627f0b3117f65e6432ba88f94ae8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
+
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
+
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40625770ada4efbe30e2d550608b2b07aca863b0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
+
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
+
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab34487ae8fc4f34b89b882efe0bbda0350a7b65
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d9e4440a838d57b24442e00a8d67d08fa7ecef9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
+
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58d50c68e1bfe76b560d588fd59a8a1cb34afc90
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f038a2c9e90eb1a9414c8703869ed8d46cb531b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
+
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
+
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a74845985748f8b2c6600391f9885127d6678da
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
+
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0879688fe8c2485929f857e587994f1844799cb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
+
+ TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe4dd2a26855025af287eb66877057685860104e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05c38f3d034c37a8525e802c1fe05d4783256eff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
+
+ TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
+
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11c035affd9367314433d29c2a36c248f002c7a0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2524084ef1aa31df355253d63041a79cd9328f83
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
@@ -0,0 +1,108 @@
+TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9bbef1e72ee109f97f4cff734cae8b4c0f5057
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f207f066e680ba208a46fbabde1ee85f468cacec
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
+
+ UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edcf00eb9c711648e4178746bfe4a198d699f8fa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_udop.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
+
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
+
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_ul2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98a97464d147f4aeb7973c88f07acae764242abe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
+
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_umt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9075e321e211ecb52b67e443c2eef3fc7d6221ad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+UMT5
+
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
+
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34dfa52a293d5da0792fd83af66c90c56bdf518a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2d053aa504a812a3897d72bbd2455428b56ef7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2e5128545c4c877751418ba1f886d5dbed3b75a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
+
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
+
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baefc2171adf295361be6ba04867ad7fb8fca385
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_upernet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45d2554cc29afe53da6ab547ed7a639ab34282d7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
+
+ UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_van.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..294335c5c3c5e1cb90ed61db081c5509d8073d4c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
+
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2184703c59a2a07d9d3ab70b06699e7ac7e68135
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c757d0f50297f6adb7f83173667c625ad4d9040
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
@@ -0,0 +1,63 @@
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting. 
+
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+For multiple turns conversation change the prompt format to:
+
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d0e45a671267c75e81d45305a1c7b113e74c13f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
+
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
+
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66b7b6b6255799241ae548fc47b80cf96b0777ce
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
+
+ VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e6757219a50f60d8d77d91a1fb379f0da136bad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vilt.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bded32333c913e06f744cf5573e8048118c2219
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
+
+ ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3bfe3184bbfafd4e1d8917de1112a2e6f133bf5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
+
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8e5d82ef56b027591e13f4ebd457f10fbf0668a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
+
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29546cccd7a65f51f53333741296703510511e3d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,77 @@
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
+
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
+
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcd4543da106972394b37d6c5f7f6a3944f93498
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
+
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ca1a13219202efd0fcbc0ec36493b19dbd911e6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
+
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
+
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef6499e9b63170df54a142d691c72159cf02feb2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
+
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c58b944f70fa55f9759f96b1996a98910f106af1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+
+ ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
+
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
+
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..296b5a5abf1718453c854448dc3ddddb0e9e1555
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6cb6c77b102d267c865e1e77595f82c53c76666
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit.txt_chunk_2.txt
@@ -0,0 +1,68 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
+
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
+
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9185b464e67ad69f40cf11b4228f1a5ba0bfe13c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
+
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca1f177445a1193aa0058ac90d0dbf03b9ed0d7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea9b45ea9f3d78672520a88b3b02249d46b404dd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
+
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7055a765917094722df8c509b48545f2c4cf64
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
+
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a424f459a1f69de77aa05e8347a1cd96ff405f3e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
+
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb0ed4373d676dcec06774c43247fc4ec9f1417
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
+
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2ef641c5ec0d555718fd59065c82d88ad28c31
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
+
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..333d7037b0f3d201497a555ca601aee3bdc07de8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
+
+ ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8afc66d09b7520fc106d825451aa466405fbd5be
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f69eeea2308d38426feda9cebf0d7cc40c3882f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vits.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_vivit.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c8778556292aef5fff6e86e6045269319a094d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
+
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2796f55395a2bcadf2ea1a02cc4055ff3c179243
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
+
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1e42087f40bd6d7216e7ef7d19bb2b63aa3af5e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d581e3cdc10e0df7447128b26eac49bf8c2999
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
+
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..870bd9170a61a7b1125a41afdf2ee53f0266a2e6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,110 @@
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
+
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
+
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
+
+     with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
+
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
+
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
+
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e65da768c280eb643ef173c8ae6f412c8d463d1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2c88d234f8749594f5924b2a6dc4bc28478c32d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
+
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..808772d40e76b5e7183a181906f25894c8138d27
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b016e03ea88942bae5babf4f3ecf840326c16d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
+
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..156b2bbcadd9f15eaf4c2e26d9a8a542b6449604
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
@@ -0,0 +1,71 @@
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
+
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xclip.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63045083afd49a44951b18666572a7c9b7f2cd02
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
+
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xglm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddee620a7fd42d1dad42b86fb2b6a6b8085e965d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
+
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff10d19d7de4e2b92971ac1aebdcad3bb3ef2d9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..284ac122aec82bb6ddba3bc8b6ada67d7e26e562
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecc58788a308c2add527a9e5229250c6d7654e11
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+XLM-RoBERTa
+
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4809ea604b01e5a29c3cb4b532e57d75b25c65f3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
@@ -0,0 +1,95 @@
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
+
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
+
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
+
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4fceef7b3939ed6951944bdc6195556babc297
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
+
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85446103bd23bf49c17bad63d7f804f646ed6c25
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
+
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
+
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d22567bc01aadff98dea75a54fe66c002167113
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ac0bcc8b9b7f639d9b6265bcffb93833ef9103
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+XLNet
+
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54d5b6f109caab33fe10917d8984f3eadc082dbe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
@@ -0,0 +1,73 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
+
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
+
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..686b45cc5e9df0acded8ef58b344383814df540b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
+
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081fb663edb86a0d71d107c0c964e9f08c46d574
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_xmod.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d40f1b0aed13a9c764c30731a98abd73c717f976
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
+
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9790668c371f74bea476e0a40a4bd33df1cc4187
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
+
+ YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
+
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21e779fe8e465fbed804cb7b63513170fae5dc38
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
+
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/model_doc_yoso.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8708bf959715b5296ad13f55d855c4e0df4b301e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
+
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_aqlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcc1f0e69babe844e6bccd2264c58da5badf7057
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
+
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
+
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01209319ecb0e94e9856f6231f24fd60af8de325
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
+
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dffb6988ddcfe4257f5f7f0a67279bb6c3e86f1
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
+
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26b6e7d2b9455df8613560f27be4e81126d85c6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_awq.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+The parameter modules_to_fuse should include:
+
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
+
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
+
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e34c25ef25bd44561edddbb86ea1ad29052fa2b5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,67 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
+
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edb910b780d0ca18e82924e4fd23fc854510d6f0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
@@ -0,0 +1,71 @@
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
+
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
+
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11de6931086249cd3268e4d26d5b336c4fbe5a3a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
+
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
+
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
+
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e96a9c07cd7bca63ad05d3b3a27e7a82cfb31fa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
+
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
+
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
+
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd1b03b1a4eece4a513ccb72ecce66076470ca58
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_contribute.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
+
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_eetq.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d03ad4250bb6171588639f1c4fa947141e017c44
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
+
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
+
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
+
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
+
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
+
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..723a03afb2dc484acf8f2d40412d33755e91e397
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_gptq.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
+
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_hqq.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48e29e2c812f6a83f4aabb3ebb8f237479e417d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
+
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
+
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_optimum.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ab85bd2d834ff4246cc016a95ff98ed892a9b55
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e69a7600495d0d299c5a925d5e73a80128648e4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_overview.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
+
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/quantization_quanto.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5212d4784523b0997ee2888ea4c9c139df9a339
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
+
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ff9340724df06fdacef93f8251ad79e0a6dc106
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..235655c399b33e836d1c84974fe44d04a37ef8d7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e88209625a0dc6ba14c9060e38ef6dd1bec0657
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_2.txt
@@ -0,0 +1,91 @@
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
+
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48a77e22e67dee44e550c44a39b7f31395be548b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_asr.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a37d6319d98e6d360f75b0ca584d29cbe49622f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..daee88c3e287345bebf956c708786709d7255b45
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..949960cee02ebbf520000712b932ade6d294a223
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
+
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c036f998c16662296342501744d708fa3b7e896
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
+
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff273be4032a28f6016df1a6abb4acf7616fc897
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
+
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3821e8f6b430ee0e2a70e9e37667b0307795691a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+tokenizer = processor.tokenizer
+
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
+
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83527def08cab38cf59a7be19cc1ccb647e894ae
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+To illustrate how this function finds the position of the answer, let's use it on an example:
+
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
+
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..938100f00961111c39b51568bca890eb1ae4316d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
@@ -0,0 +1,69 @@
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
+
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
+
+     # encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
+
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f4e2957114083fbea4e38c606cfaed0eee8ff35
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
@@ -0,0 +1,87 @@
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
+
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
+
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
+
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a3119d4f0ad46e007692513c6c3a00f1bf46e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
@@ -0,0 +1,33 @@
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
+
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..726b4b4bddb1f387ea532cab2e0c92fe2a8e4cfe
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
+
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50b2cda950ca0b59c3667a4c6b61b081d695d7ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
+
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
+
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
+
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82087300f3ffda107e21d7b11a0512c61ac0fddb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_2.txt
@@ -0,0 +1,51 @@
+Photo by Juan Mayobre.
+
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
+
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions: 
+
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
+
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5110f28af0d57bac07847c5c0e6d3ec50f0ff3d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_3.txt
@@ -0,0 +1,55 @@
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
+
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```  
+
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
+
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c59b6877698e4fe2876d9d8e202e775bfd92f11a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_idefics.txt_chunk_4.txt
@@ -0,0 +1,63 @@
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
+
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models: 
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
+
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cabea9de96dd8de79fb1b6b86c21af7ee06928ec
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training. 
+
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
+
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
+
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80d3adf026414271e0b13c7a61391efd93760a77
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
@@ -0,0 +1,80 @@
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
+
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
+
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52311bf9ddeff99c5fbdfe8d5d7b76ecb4934ff6
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
+
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
+
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adb617d9dacd1c8527fdf083302a2fce48e6b78f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
@@ -0,0 +1,88 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
+
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+ def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7af6f6e2773d752219e98431ec11926e88b772a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
@@ -0,0 +1,96 @@
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ac795e4a238eac851cdf2c445a1ff173a58df0f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
@@ -0,0 +1,87 @@
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69b7c0ae3d9a770dd3e4296091bd420377c4db65
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8d49fc7a206b99fecc3606a90d15d27ba4199e2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
+
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
+
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
+
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
+
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
+
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d82ca1f92c6b39131aca30bf617246073b6f84b3
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d59a1f92fe96083ae307f954d03766b98abd0d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
+
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
+
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
+
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
+
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
+
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03cb7687daf7f1db15c757f30fd08c6b1087c536
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process. 
+
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
+
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
+
+    with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50fd87f4f17c86ac02fd51b5b6b113605cdf361c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
@@ -0,0 +1,79 @@
+# Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
+
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
+
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
+
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1f8ff5a01d7a8a3f86631d760d70ab802867d54
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74f3121e607e5ffd4d02c8647f3b9fcbcf848840
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,65 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18bc916d4afca0f33415175043a71f9c0d16e486
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,99 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..248bdefda76edfc96364ff5247c3cd303c78d063
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,62 @@
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
+
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb082f110c34df6c4e9d0d7a0de79a12baa94bb2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23e8586df72c347ae6a59d6a41a1979e9fdeed9c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks. 
+
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
+
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d5ef9b62f86656394000d7d5d6c5dff89c84bf5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
@@ -0,0 +1,70 @@
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
+
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
+
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c28c687602352fecda37dd24d3ed9c83fe31997e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f3521eb1ab0fbd7353e1280b37975a529e00dec
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,64 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM. 
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..357c5724a8dfec8ecc52db45513862508193e0fc
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,99 @@
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eea0db9c1245912ccb1314dc9ed0c88159f3beff
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,79 @@
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
+
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
+
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a13b2d760b954f57f03d95c99f3f4212ce4ffea
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..528bbb869c0a718a2eda404718748ac1863d3d57
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
+
+     first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb2a73f8e7f2a318dc7d38b3d73d8183077d1b48
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
@@ -0,0 +1,95 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47d6ee69ab355c53337a29c08c7b45a9af839005
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
@@ -0,0 +1,91 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdacd76adb4c2c8b437fd8ad53e7b72e203d1ec7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
@@ -0,0 +1,55 @@
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8003d638cf0fed21ba4aeef9852a5a12c5f0f160
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,79 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
+
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
+
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a38046c07a8c403225e63180b2d01cff172b60c
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,73 @@
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
+
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
+
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
+
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1178aa28ab17bfdfee41e7f358f4a9dd2d278f91
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,78 @@
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
+
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
+
+     Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
+
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
+
+     images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a45e5b28d799f70ae9dff6ec770be75cce534872
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
@@ -0,0 +1,64 @@
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
+
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
+
+      [[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
+
+      [[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
+
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
+
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27c783020e5fa7c0a9d6e212817f1c79feccc8d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
@@ -0,0 +1,32 @@
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
+
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57a6743a119980dec7f5354440e5dda548b18459
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
@@ -0,0 +1,54 @@
+Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
+
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6784981b2b498e3cf0fc9cd28f42f463fc21d553
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
@@ -0,0 +1,173 @@
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
+
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
+
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
+
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_7.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee5741f4c6968fbe12a721a99a3304915836dac0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
@@ -0,0 +1,467 @@
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
+
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
+
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_8.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c73c69180c14fb8eb081458e0659d16158c2e54
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
@@ -0,0 +1,292 @@
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
+
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
+
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_9.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87eb5c0958776f940eb760f82d391c582d89fc71
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
@@ -0,0 +1,40 @@
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f173d3532b5d8fd30948ace96557688d4d67b9d5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
+
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
+
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee9cd1df6a40c9e45bc144c63465d6282092da3a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_1.txt
@@ -0,0 +1,82 @@
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
+
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
+
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..456ec98212952de6b9b276e04fc475b14f80f55d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_2.txt
@@ -0,0 +1,74 @@
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
+
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
+
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
+
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
+
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
+
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
+
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1635bdc903952679cb2c2e58573ba253873657a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_3.txt
@@ -0,0 +1,60 @@
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
+
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
+
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
+
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts. 
+
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e503270552670f404d2ae043396b75faf32480f2
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_prompting.txt_chunk_4.txt
@@ -0,0 +1,54 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
+
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
+
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64426b7f51251881084a9f262b20baf11746e088
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
+
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a927465bfa1bf35f8eab0b3057125068409903fd
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
@@ -0,0 +1,90 @@
+offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ca673e6fec9332a94c1c3cc27f42e0ddf35df73
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
@@ -0,0 +1,92 @@
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6c0b76cb49edb315dc72af63f6e9329206b629f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..688df4649138edd6218f0620127573718188128a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,91 @@
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
+
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
+
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
+
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51dabc660f0e0255036c08f37474059b8a510695
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,89 @@
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
+
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
+
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
+
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..006ba0e0c0417ef801c522eb0e597b9fb707bb75
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
@@ -0,0 +1,66 @@
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
+
+ from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
+ 
+
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
+
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
+
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1840655d7d7d9ba7e42dfe1c31bf8484e913683d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
@@ -0,0 +1,89 @@
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
+
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
+
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
+
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a7c39e5782783c3e50488135d3fc2dc8918fbd4
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
@@ -0,0 +1,85 @@
+pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b8a145a8462b4451388baac332e83f2f18073ad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
@@ -0,0 +1,101 @@
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5288db8325b0f457e255b94e04fcfd64f081287f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
@@ -0,0 +1,170 @@
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
+
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc4e87230ff06973c3384e30154e4f1b70652380
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
+
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
+
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc2fb447945854d04d2a2925960ea5ba4c39096f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55c0ae5e9e95f31cbf5c488be678ea1032ad968b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
@@ -0,0 +1,99 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeb82332ebd358d48e16f3ba332467c64a157dad
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25b4db4006c327cf29b437cecf6d65f51dd9ec7d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
+ 'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8015e8c7776d70cf815ffa8910ec097d68c3ecb
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_2.txt
@@ -0,0 +1,74 @@
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
+
+     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+
+     result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3165d538cb8f9730939243b31f6592276219ac3e
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_3.txt
@@ -0,0 +1,93 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
+
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9956121bee067ec4271d3a4997e754b21e05dfa
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_summarization.txt_chunk_4.txt
@@ -0,0 +1,44 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
+
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..322e8c512bdbe3fd87921934cd01700cc6d2bede
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2605222fcb15e786d0a0de5934522040017db18f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,81 @@
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
+
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
+
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
+
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
+
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db6bb940e091c7cc581dde52563c79f62c3bca26
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
@@ -0,0 +1,85 @@
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
+
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
+
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
+
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b22198e8b9eacdef4c9c9e81d41d2b5e4b23bf0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
@@ -0,0 +1,79 @@
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
+
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
+
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1247ccb5e00884ff0bb2aafd9965bc06da861155
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
@@ -0,0 +1,118 @@
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
+
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
+
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b4c3fba2a572894ffd72d4fe72fbfae7d83af4b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
+
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2568c6fc3c6a9eb6b29495f481445c4be3996b6d
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
+
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
+
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
+
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a47aaa17054a8d95a38da630ec5419d110012a0
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
@@ -0,0 +1,99 @@
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+
+     labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
+
+     true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc77af7972e77112896b2d1f565b60ff2091c596
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
@@ -0,0 +1,91 @@
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5e3e04f5127b5828e1de1370d3359c4043652f5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
@@ -0,0 +1,136 @@
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48286598635980c35075127aa473b58bc8737bf7
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9df9428e1ca52437011f1f62f496c6a9d4a617cf
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_1.txt
@@ -0,0 +1,98 @@
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..788b10e14453f0bc8de5ca43e2c7b50c47174cb8
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_2.txt
@@ -0,0 +1,77 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
+
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f3fcb5648bbf324f902839eedade0de2f288833
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_translation.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aca7c4e52223938cf4e88af571a7d3bbcd652252
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,102 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
+
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b07931ff92ea3311ac04946bbf21047a5240adf5
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
+
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
+ 
+
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
+
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need. 
+ 
+
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b8ad168dae2ce3a2ac38e534bb4e799f470011b
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
@@ -0,0 +1,87 @@
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set: 
+ 
+
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b313e993dfe61d4a79f3c23950e3f495c13b8e93
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
@@ -0,0 +1,74 @@
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
+ 
+
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
+
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
+ 
+
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
+
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_4.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1e32b63e5ba6b510fcce682c7eaa0a01ca458c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
@@ -0,0 +1,82 @@
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
+
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
+
+     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44d23563b6a557576abc1c5eacf0f6754e9dfb39
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
+
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
+
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b7a74dd3174499fca9048f11df4a6dd32073a70
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
@@ -0,0 +1,74 @@
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
+
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
+
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
+
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea3702d37d481e39ff4ce3ec892823d4e3f72a06
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
@@ -0,0 +1,96 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
+
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
+
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6e4ec79bc520a697363c927945aae0f009a2dba
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
@@ -0,0 +1,37 @@
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically: 
+
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
+
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ad465824c6d687f266c2c1099c8ef5ec1a5813a
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,76 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
+
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8956f31f446ff8f6b244bbc1b96e7608ca94091f
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f20168747ae5c643f832964e57066bbc99c6248
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for. 
+
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7b31bfec825de43db674d1469c88ec8b0f25209
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
@@ -0,0 +1,85 @@
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
+
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
+
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
+
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13403819c28f840f4ccae9201058b5a47905b642
--- /dev/null
+++ b/openai_text-embedding-ada-002/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__config.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__redirects.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cece03f715d9196bf3d2daf60e357d22f5aabeab
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,297 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
+
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
+
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
+
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
+
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
+
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a55d62776e4fb4469ea78b239cc3c96085568de
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,545 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..452da2177f9616ede6be44e04ab189d031cdc01c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,64 @@
+title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_3.txt b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_accelerate.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e75a215b828f769c04356d8b6dcae78956e40ede
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,89 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcf4a3f2fc1a8344e3c1e087626813b04348286f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,266 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
+
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
+
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
+
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
+
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
+
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
+
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
+
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
+
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
+
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
+
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
+
+   git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
+
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
+
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
+
+Jupyter notebooks / google colab
+Local python scripts.
+
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
+
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
+
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
+
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
+
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a8f9fb569942dead8ee03933313157fac3a89c4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,238 @@
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
+
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
+
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
+
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
+
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
+
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
+
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
+
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
+
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55a5c4db702088943d6d90977084ee9dc90d4ad8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,185 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
+
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
+
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
+
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
+
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
+
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
+
+make style
+and verify that your coding style passes the quality check:
+
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
+
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c34e0207296c80f0969b005b0e6170949b809084
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,179 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
+
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
+
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
+
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
+
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
+
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
+
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
+
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
+
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
+
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
+
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6890377b768b4478ee4e54a74769d701c4a534da
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,308 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
+
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
+
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
+
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
+
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
+
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
+
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
+
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
+
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
+
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
+
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb1a2ae507df50f0917f1ff7512248f655960a29
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
+
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
+
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_attention.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12bfe65cb566a30a141f14020f69fd3471f44604
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
+
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65bbdca698fe62f2158f273ce37bf07e4f7d0a27
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,111 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
+
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
+
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
+
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
+
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
+
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_benchmarks.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..172e2896fc1fa93d4b058c151d4eb5cbf63453bf
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,298 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
+
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
+
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
+
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
+
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_bertology.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a3ec657c3a7176526ec66941579549dfe87cb22
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
+
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_big_models.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99b30ac48011955510ccea35a9ef21d260f1c648
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,144 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
+
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
+
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
+
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
+
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
+
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
+
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fcc3b8d3ba4ad06e8ecb395a440dee5b686b2fe
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,308 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
+
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
+
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
+
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
+
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
+
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
+
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
+
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
+
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
+
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06e77198479a70fb120b120b5062efb5117ec494
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,208 @@
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
+
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+schema = get_json_schema(multiply)
+print(schema)
+
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
+
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
+
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
+
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
+
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f9746c3e3118dbcfdc1a4c4979a66e71f63b9d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,135 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
+
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
+
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_community.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c0b026adac7f4003764e19db847051a7e73152
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_contributing.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5cb2efb6721d96d842db3010b00c05e41ec1d62
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,262 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
+
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
+
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
+
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
+
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+
+   pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+
+   make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
+
+   git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
+
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
+
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
+
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
+
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_conversations.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84adae6203faaf471ec3d610b171cee08d0589e7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,210 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
+
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
+
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
+
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
+
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
+
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
+
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_create_a_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e187f029931b1216fd7ed05b89e7e6e676946e0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,311 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
+
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
+
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
+
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
+
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
+
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
+
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
+
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
+
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_custom_models.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fddfa048c5612bf701bc1c779c99b1b8b692cd49
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,264 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
+
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
+
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
+
+    self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
+
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
+
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
+
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
+
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
+
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
+
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
+
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
+
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faa05c84a19af2e64bebf95262f5767fde4806e4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,266 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
+
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
+
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
+
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
+
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
+
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
+
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
+
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
+
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
+
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
+
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
+
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
+
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45ae33430cb8b8752a28834b2b707a4d4dc11a1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
+
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
+
+                  *** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
+
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41f20f5e4f2012b7c9963e8f7d43f7e2b9655e8c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,231 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
+
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
+
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
+
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
+
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
+
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
+
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
+
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
+
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
+
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
+
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
+
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
+
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
+
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
+
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
+
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
+
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
+
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21de80d4658d77ddb21a07e416ead2d7524edca6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,339 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
+
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
+
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
+
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
+
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
+
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
+
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
+
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
+
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
+
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
+
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
+
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
+
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
+
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
+
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
+
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
+
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
+
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
+
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
+
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbb0e977dde442003283338a0e1f340254798bfc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,293 @@
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
+
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
+
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
+
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
+
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
+
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
+
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
+
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
+
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
+
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
+
+Save the script as t0.py and launch it:
+
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
+
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
+
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_3.txt b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2352a1ca51a7e19ea74c77c6cf3dc98dab9954e6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
+
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2645dc0d9cf9125a758c19d06a1ff07d3991539d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
+
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_fsdp.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69d8f97140085543e168479f859abdb970e6b354
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
+
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
+
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
+
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
+
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
+
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bfdd27cab63062c12c278fbb11a19487e8720a3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,280 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
+
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
+
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
+
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
+
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
+
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
+
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
+
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
+
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
+
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
+
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
+
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
+
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
+
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
+
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb3387611dc1d0febf0dc30cbe32baf15851f9f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,94 @@
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
+
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
+
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_gguf.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..578db765add050a76ca50d5404eb46cbe3e10df8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
+
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f32347eda00511190ea5a3e9ea349d579eb380fc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,201 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
+
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
+
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
+
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
+
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
+
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
+
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
+
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
+
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
+
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b704a590010382d5d6e8aad36a83285bcbf6e1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,136 @@
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
+
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
+
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_hpo_train.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c4cd1c2545b13b6236decb1a549e24c6b587ddb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,98 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
+
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
+
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
+
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_installation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac4b2202019ccde5f1b099482b876108d003315b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,138 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
+
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
+
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_llm_optims.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..237f5ed09814e8813f0f19b426b29151775937e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,227 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
+
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
+
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
+
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
+
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
+
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
+
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
+
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
+
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
+
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
+
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
+
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
+
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
+
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
+
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fab226ddf2b8e740f68cd4fca149d6b4984abb31
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1,164 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
+
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
+
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
+
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
+
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
+
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
+
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
+
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
+
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
+
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a8eee5ae52a2ee4957728c3fdee9cc641af3cd7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,199 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
+
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
+
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
+
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
+
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
+
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
+
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
+
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
+
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
+
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
+
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
+
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
+
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
+
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8f6d3d09dc595dd361f44f3f1237a799bbf8a95
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,196 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
+
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
+
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
+
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
+
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
+
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
+
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
+
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
+
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
+
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7734b4adb20cde3c2483addb181d4f0a5f8e4ead
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,127 @@
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
+
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
+
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
+
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
+
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
+
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
+
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
+
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
+
+Falcon
+PaLM
+MPT
+BLOOM
+
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
+
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
+
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82eb62e1151058c2026d33059e9f35bc8ed27c35
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,187 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
+
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
+
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
+
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
+
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
+
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
+
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
+
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
+
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_model_sharing.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8024c6921e1e8be563efd504bb19fdf4fe964d71
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,138 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
+
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
+
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
+
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
+
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_model_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afd83841235ae82fe0c132be46baab6e9bd92e1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
+
+Computer vision
+
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
+
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
+
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
+
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
+
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_multilingual.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3aa2aad3f5104715d836e6bd4c1e3235441614e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,119 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
+
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
+
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
+
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
+
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_notebooks.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504f04ccbe55506269bfdf8ef52e645c1fe26e15
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,102 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_pad_truncation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9f37503ab56eba0b654a1f0f03882ce588be46e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
+
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
+
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
+
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_hardware.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b39fd61bcabffd13daaccebdc90e4741372f8d4e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
+
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
+
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
+
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
+
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f36c640926d21b64b29e68690a31be4bd25ea7a9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
+
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
+
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
+
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
+
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89178a42a46513307e53d0895f0c9985ab94ae1b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,298 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
+
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
+
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
+
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
+
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
+
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
+
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
+
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
+
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab3ce971aee2be7686a38906066cdf7dd7a712e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
+
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
+
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
+
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66664210145258858a2f52d7c4371bb66e2b1a32
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,200 @@
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes. 
+
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9226492f56fe8bded56bb98764a5aff1303bcf93
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
+
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b35d818c9baea699874eba27c94e5c40b42cb19b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,245 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
+
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
+
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
+
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
+
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
+
+              resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
+
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
+
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
+
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
+
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54555181929f55dccab0f62f33bfb10ec0556b5f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,230 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation. 
+
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
+
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
+
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
+
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
+
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:  
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
+
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
+
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
+
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
+
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
+
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient. 
+
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
+
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7733e23b88069f9a8c22d4ccaab85a0decf0d171
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,217 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
+
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
+
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
+
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
+
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
+
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
+
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
+
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
+
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..403e1b86d7ff87fa4694a9904477c4057036e633
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,247 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
+
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
+
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
+
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide. 
+
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
+
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
+
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
+
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
+
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
+
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
+
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
+
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed: 
+
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27d9b9c3843ba9a5f140992923a7c187203d96d0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,120 @@
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
+
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
+
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
+
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
+
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
+
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_special.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c49c613bc32f763b8dde05c2caa45fc9461a2bb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
+
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03ff60dd96fb7e87404627ef073c5eac19af610e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
+
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
+
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
+
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
+
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
+
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
+
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
+
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
+
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_performance.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3eec54e090ccbeb20e244323ebd29183ebba5547
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
+
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_perplexity.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19f534fa222e263465b559e9f6704d79ab864ad0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,97 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
+
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
+
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
+
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
+
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
+
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
+
+    # loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_philosophy.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d0bddd6e69acf7995b77a899440ccef9e7e643f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,65 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
+
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
+
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
+
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
+
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ab34dcb85eea69bbba0a331d80e480ee242c012
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,218 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
+
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
+
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
+
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
+
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
+
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
+
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
+
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
+
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
+
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
+
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
+
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
+
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..193357e91fbc8cea861ad4ebd479b0394849adc0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,123 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
+
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
+
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
+
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_pr_checks.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b34665986ee285000bf0b5800b2026cd3e06fb7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
+
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
+
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85e9a7994a362133e775006b17b51992fe3330c2
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,218 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
+
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
+
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
+
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
+
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
+
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
+
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
+
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
+
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5fd812b1ef6c00470cba2806dcea0126788e4ec
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,167 @@
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
+
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
+
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
+
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
+
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
+
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
+
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
+
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
+
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a55a36d81879e8bfa35aaebfbe0d480ad90dd13
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,311 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
+
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
+
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
+
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
+
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
+
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
+
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
+
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
+
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
+
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
+
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4baccba875e5e6c52fe667192235c1e43450833d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1,67 @@
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
+
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_run_scripts.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f60a18560dbed1eead1ba5a0c04e4318fdef2ea
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,255 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
+
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
+
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
+
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
+
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
+
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_sagemaker.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_serialization.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd60b8ae76fefd28c625ac379c04003ee2ef262f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,137 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
+
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
+
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
+
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
+
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
+
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7efce749486b243f8db2280552ce8010e0b339d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,228 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
+
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
+
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
+
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
+
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
+
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
+
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
+
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
+
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
+
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
+
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
+
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
+
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
+
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
+
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
+
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
+
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
+
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
+
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
+
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
+
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
+
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
+
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8331a2b008634639f8e54980fdf313e1bae6ee1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
+
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
+
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea2b008fa6f8b6ee3739072d12bbe6aba457ddeb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,140 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
+
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition. 
+
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
+
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
+
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
+
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
+
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
+
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
+
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride. 
+
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
+
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
+
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
+
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
+
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
+
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
+
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
+
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
+
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
+
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
+
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
+
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
+
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
+
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
+
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
+
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
+
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
+
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f6124b090b0639de2e8f1081c2a6e4842066d9e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
+
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
+
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
+
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a35bc4d6132f7b767df68b4cec0e45cdecb32ca2
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,387 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
+
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
+
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
+
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
+
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
+
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
+
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
+
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
+
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
+
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
+
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
+
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
+
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
+
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
+
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc82bf0cc1b006ee60e202dc5ee3887dae1d3ba8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,391 @@
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
+
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
+
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
+
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
+
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
+
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
+
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
+
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
+
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
+
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
+
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
+
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
+
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
+
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
+
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
+
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
+
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
+
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3d0623d3e50d4c1beb1c6de85be59a0edfe329d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,115 @@
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
+
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
+
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
+
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
+
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_tf_xla.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cf0c6113edaa73b1ff2f01b68e96329d34293ea
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,104 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
+
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
+
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
+
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer. 
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
+
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
+
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general. 
+
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_tflite.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10d6b20dc5810ec98cd50ca38028dc7e19921a91
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
+
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbe3a0c0e106c85c26797c064b2ff24f955c47f1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,183 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
+
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
+
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
+
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
+
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
+
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
+
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
+
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
+
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
+
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_torchscript.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8163dfc516345db2ee2b16c9224a7475ca621484
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,158 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
+
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
+
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
+
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
+
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
+
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
+
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba1163a57cefbb90f50605546579fdfad06a340f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,345 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
+
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
+
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
+
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
+
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
+
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
+
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
+
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
+
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
+
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
+
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
+
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
+
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
+
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git. 
+
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
+
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
+
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4696b1053410817ea0e1ab43b580958647e8012c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1,103 @@
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
+
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_training.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3ae5b49f2fb1eb946ac0654a2b09679928a12e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,271 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
+
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
+
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
+
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
+
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
+
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
+
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
+
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/_troubleshooting.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7c87c5cadba049830a13629db6bb92f3cfba675
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,112 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
+
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
+
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
+
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_audio_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_file_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_generation_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c35c6a8eacb343df6620e3bd80eaa55cd437ede4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,245 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
+
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_agent.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..788e8f286b20ac6f788f4ef99614eb77eaeed12d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
+
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
+
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_backbones.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cde1b5b205f04ae43291411a7bbc19f3c7a73bea
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
+
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_callback.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..914898783857334c68b7587c6834254ba0fb7808
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
+
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
+
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
+
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_configuration.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f39666365c9363c7c65c1077024bfefec8e019d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_logging.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a818ee26a8d69b0fece49fcae19d221477c39c6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
+
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
+
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
+
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_model.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a686f8babb6a7b2e9617ffe3ff6d2e836b47c9d3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
+
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_onnx.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6294e6269d2344ebf4a51e8c458c0b32de7a22
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
+
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed56043d2b4b86320b73f9956ea121bf314b6348
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_output.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06131998608a30bcd94d5fd22e900b47b948f11a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,162 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
+
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
+
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c40f0262e3313cdd669881db458beb233842b376
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,342 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
+
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
+
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
+
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
+
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
+
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
+
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_processors.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5072025a936a0584036cc0e82bfa2469829af38a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,105 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
+
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
+
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
+
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_quantization.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f06d4f5eeaed4c5f87655fb147c5d0b3025f8f3c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95582c89cc7013da6dfffe75462743679d06c8d9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
+
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78b98dfe716fbfeacf0894171a5da066889cb494
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token). 
+
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
+
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
+
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/main_classes_trainer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7741630488f2fcedc580d329ce9237d611421ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
+
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_albert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbfe3a794422e71cdd3d7cf104559a55ea9150fa
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,152 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
+
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
+
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
+
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
+
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
+
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_align.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c66265dffa38a3ae1e6b1b8a052e946bef8365a1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_altclip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f8d8988be560d924ab8d71503c2e90f9e1c1a86
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7b1a391e94dde26506b68784b558e53451384c7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
+
+ Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
+
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_auto.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a117b58054729caf08914ff680871c9f59426051
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,189 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
+
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
+
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f143a146e88d837b9935d1e40c3f3c3406b5d3b3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bark.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..331d52e9c5244adff6da5730376fcecc22deb913
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,135 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
+
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
+
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
+
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
+
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
+
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
+
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bart.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28b1ad5550e00cca8e564819b6e388f4f6404931
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,147 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
+
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
+
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
+
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
+
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
+
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_barthez.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f3b193529d218fb562cadc9959233ef18db1c13
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.  
+
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d699e305db5dde4f1fe1fc071e4b3f900257bd
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_beit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23513ca784cd0c0b011163d0284c829d7db914ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,92 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
+
+ BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc7d5a61372ad88e12cdc2537aec003cbd3ab6d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
+
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
+
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
+
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4bbd57a1197ff4b017ad4bd1c853a4615b2446
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcdf6802ff565ab9fe0451ce9f7f54894d87bca1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1,218 @@
+BERT
+
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
+
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
+
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
+
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db216fd85516a776ce583f8642525024317eecf7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf18c8fb15f0a165c8d8b892653aeb43a4156223
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,107 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
+
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac44817c5ae7b075f8d52cfb5fb29de213c47b3c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
+
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c768bfe2f141f8eb3a78ab1c6ca1a3334dd9017b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3b8ef44caa1c71f6900dedf1f7749ac1a939a81
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1d7ccff0434f3499336620c315692bd292526b4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
+
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30e7afae642b43c3115561662916b36b7bda1b2a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
+
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5db0b870b534e949b8d69624054095100425cdaa
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f46e7e9f3ba09e416c794e16840d21e30199db
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
+
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
+
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
+
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bloom.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f99dfc468ec4184ed17682d20dc71a7992a0df9f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bort.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eada2d1950fab7872518fad0142571a869597973
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2da0228ac1d46b93e39287db89febb3497bbe49
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,109 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
+
+ BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
+
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_bros.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b017b06b0c5c6a20b438bf624a592d91ff99ec8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
+
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
+
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
+
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_byt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ec74398249c7250aa2cd0e85f9120eb02b14482
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,105 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
+
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_camembert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d516af79e87635156fe91d79586af436a51bc746
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
+
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
+
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_canine.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..734ca0daa599b7767295a29850ce9b5c54bd6883
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,98 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
+
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
+
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7545fbfed233c3d90cd26bfb38adf67cbb201604
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
+
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
+
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
+
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_clap.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651c89af78bbfc448db279a2a241385d8698e566
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_clip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bfd071f88583197c83cced619cb8e2bde7c4f93
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,140 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
+
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
+
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
+
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2280fd021f192fcbbd754d766b2ed2e6df5f0d5c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_clvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cca4fa81f20e7b4e5a43bc4a0cf603d46986b874
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
+
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a441bd01288eb4df2dff5ae44abee7219afcea6e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
+
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
+
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
+
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
+
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
+
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below. 
+
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_codegen.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b542cd5463cf1de2abaeaf51902053651df302c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
+
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
+
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_cohere.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86d09da829419696b390538a3e0f3cf4aa6310f2
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,97 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
+
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4b2cddff53e127f244ab98a7081de9449895fae
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
+
+ Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_convbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e32c7932b559ab71672efc1291e4d49d5c1e81
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+ConvBERT
+
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
+
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
+
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnext.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de0ce09b58e4da4c790f0545f40aab900f5d5fcf
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
+
+ ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
+
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62cb8753dc6f845e0aab7d64de81e5fbaa4d4d00
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
+
+ ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9561219451f8bf41883c98d80b4984f3b4cf160d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
+
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd37eb7179fabc3861c0ab3377f70088d55cf218
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78204bbea8ce172ff8833cf0de1b3e088866869b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+CTRL
+
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
+
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_cvt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..accd09683e0557fadd2ffaf760454536124605de
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b58b8d5cfb6739da3f0f21770b10ba3c546be3b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,109 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
+
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
+
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
+
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
+
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec9e6e76817d1733e7006568fb4a55fedfb9a21a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3534e49c7916e4f9b104e47290a3dd4443f057be
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,103 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
+
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
+
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ad32c4700634ada4ad52de7154ca4dfcc009473
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,94 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
+
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0414acdf856a4798b3697ca69b041cfdb6f68b34
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
+
+ Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41f06c8013ccef996ca510c0e6bf6af8e768be67
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,112 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
+
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
+
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
+
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deplot.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88b1337ae1eb4f22cef55c457453ebc622bbd1fb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
+
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff3d2a3f2cd17870af62e77aa535df431d469855
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
+
+ Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_deta.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6a39e82094b6d1d26d02f14a59a690963d5e2c3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
+
+ DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..001cce26882647ab0f3c5ea484a6f7a7de35f75c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,157 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
+
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
+
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
+
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
+
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7f2d6600e4966956a863bcf443417941d9ce83
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
+
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
+
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinat.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0e131bd78fa9f0970b31aea1d89b5487460b39e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
+
+ Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
+
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b418e8532c2e35fed26cc984483c115bbe2fe61
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e51df9e779b9ddb0db5b22b3e1713d1594722e54
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1,171 @@
+DistilBERT
+
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
+
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
+
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
+
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c6d83687397924fd229efebcfb00df1effcafe
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
+
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
+
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_donut.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fffc9d26ac9454e75cf441457ddb302218c0f35
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,149 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
+
+ Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
+
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Document Image Classification
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
+
+Step-by-step Document Parsing
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
+
+Step-by-step Document Visual Question Answering (DocVQA)
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
+
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13a791f93a0bb29ac927a03aa216acc67b24e4ca
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+DPR
+
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
+
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
+
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca4189eba220efa014bf79caa8c9a73e8304f15
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
+
+ DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09d9964d0c37a2c3173694b7a12b75bb54b5414d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
+
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_electra.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..726004ed7433002e10ea643041dcc7c78edde0fb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1,130 @@
+ELECTRA
+
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
+
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
+
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
+
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
+
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_encodec.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..583a1d565adee2d951848f551713ff0708ed85c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon 
+
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
+
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a31aadb82eb0831dba0d3076de181bf59a9bc175
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,114 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
+
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
+
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
+
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
+
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0b770107a8d4ebea40b81a3cb339b48f5ebac4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb5834c760900ef862166ba0fb041abff53b391d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
+
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_esm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..802b00669c8b05a1bff5078b9e8fe0f1016c0aeb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,104 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
+
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
+
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_falcon.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..437762e2371d95148120ef2cff07e89b9db88899
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
+
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
+
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35073c13f738cc1874b18b2b5061c6c1911414b1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
+
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b95904ca2212ee86cd779efda5c74e535b17f8c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbb237cd0dd77dc0531d1bcc599e6cee8ab023df
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
+
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
+
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..540ecb53fbf9672758376bcf0611e1c757310f44
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+FlauBERT
+
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
+
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_flava.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c7e698f36a60b53ccaa4473e349b22fc528b96
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_fnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..210536026778a1b913348b1ca953820d38bf55f3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8e451c3fa90eca12aeb8ca9a43535908d609e2
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd5106041b29603fcff622fe866dd641c49be6c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
+
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_funnel.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529fb1176884455bfc468ae81e56efd42092aef7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1,106 @@
+Funnel Transformer
+
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
+
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
+
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
+
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37b1a19a968c5414da603480d1142abf3fa25ad4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
+
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
+
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3afa411d55ccd9ae8f47ea73776f47f9f62d62d0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_git.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b07fee84014ced208c73902ebf0b9300fee3e5b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
+
+ GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_glpn.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f60651d42ce0f66c3e41bae4ac29691389f7110e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e974be9e77f43dee52ed11bac1861647f19f324e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8541d6b5d9df23b84f490d66cb6cbf24b669844e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1,190 @@
+OpenAI GPT2
+
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
+
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
+
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a5a57781a1f6f170d1cbcbae6f103c261d29a2c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..264e8d305ee2bd520f57ac31ca6e32342e111a77
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,81 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
+
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b845a0c92fb0e975df309530be43598dacf0016
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55cd13f19eb1f3a9b4362b7e2c3005972de301
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
+
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptj.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41f453bb5f9aaaaab8d4e2236897c5b3333eb530
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,135 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
+
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
+
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f456bc76a1480fe39a0da2dda4c84e72de4f5b9d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
+
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
+
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
+
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50dd1c0756b70942d8bb51b24481fffb16bca6ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd43a3a495dc335c706cb397df5cd4c76cdedf23
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
+
+ Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
+
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
+
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
+
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ccbc3767ab7021bec6113e6b3d76484c411a541
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
+
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
+
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_herbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dd4acae3b54f041cf13561ad2ca5ec80fc709c1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
+
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_hubert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a8d4182e176195191b97581825b2d58bb9b5441
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
+
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_ibert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..498657deaa792c653fd4f3d3dc9be4cb8ca11a54
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3d5dbf212cb8aca6bcd9a36b357b360ca7f5097
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
+
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
+
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0f213f205c7e19bba29e52b205b7d18aefea32b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,146 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
+
+ Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
+
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
+
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
+
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
+
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8337adc5c45ea476ebc7d11241db638193494490
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
+
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
+
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_informer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3daf2008f240cdd213f013876d031db5eca85958
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b05b25a8877b807ec05ca33c5255eaa7922daeef
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
+
+ InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_jamba.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77866bdb0d6b1ad0a462f19cf2ecc8ab48578790
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
+
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
+
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
+
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
+
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
+
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..193a50552fc2efc995c12f219202e088b6948aed
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b61231c3076741cb9934496200e932ebf73bd6a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
+
+ Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
+
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..824cc6a0952f35cdf310dc7e24b6aa878fcffc7b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,105 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
+
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
+
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee386a40e8d151d2d25a9224dd837986059cde59
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,252 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
+
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
+
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
+
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
+
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
+
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
+
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
+
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
+
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
+
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
+
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01e3a6886df8578c267f14c12fb0fba4e9d8e100
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
+
+ LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
+
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
+
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
+
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529a4287fff0c600a02e3157fbf5439d3edafc69
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
+
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_led.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b91a39fd7264e045f4fc326f7c92d08f467d0b84
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
+
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
+
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
+
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_levit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0e31875177615836929f7333472cf0f8dcc86a3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,66 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
+
+ LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
+
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_lilt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e2fb98edad26fa9753dd4bbb621e2abf624225
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
+
+ LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36329dd7be9430623845633397e81ad2872dc2d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
+
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52857fcfb9bc49a5448306eb6796bbc157afad01
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
+
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
+
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama3.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b0d979b44d00895a1e1d63c23b82e0e98a059bb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
+
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e05baa168215920f9a91b7f8266af5eaed50e770
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
+
+ LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c6a962c3869eac514aee59fe13452ee5c683103
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,114 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
+
+ LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
+
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
+
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
+
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
+
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_longformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad004a0bd6427696ab225ba81fc3efe9e7e35ed5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1,121 @@
+Longformer
+
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
+
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
+
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
+
+0: the token attends "locally",
+1: the token attends "globally".
+
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
+
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
+
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_longt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89a4c194e1a01921a7a9c2153acb4ea338327feb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,92 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
+
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
+
+thon
+
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_luke.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..364bcd05c68081d3e29bf57d48afd9ba830a9506
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,141 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
+
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
+
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
+
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
+
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
+
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
+
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
+
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
+
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfc70c818ea5f066f1e8c255820c337b55dcdc86
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,68 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
+
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d73602f9c3984d6fe27de25951a91aea37840945
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,103 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
+
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7f48e097254205a7db44c38ea7c419bf9ac4c0b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mamba.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c00ecff4f9cb9b1eeace859bdb2989a6548070dc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,69 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
+
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
+
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
+
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_marian.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e5298163760b43813931b4c870fbe2d6e7c187d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,155 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
+
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e630185058ceea68572a9a5b9dea0dff8c3e8d0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,181 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
+
+ 
+ MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
+
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
+
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d4d9704b0f443700b2bbcaeff68229cc87dca10
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
+
+ Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e2f9f77481461cf89174d8b96fa5d033e481336
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
+
+This model was contributed by francesco. The original code can be found here.
+Usage tips
+
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
+
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_matcha.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d5cf87ef382570a394cb9f080fb4bb8bb1944d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
+
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
+
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mbart.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de55e1f7cf483f8d2818fc8d55dba375f5e60eae
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1,160 @@
+MBart and MBart-50
+
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
+
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
+
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
+
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
+
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
+
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
+
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
+
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mctct.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e10fa7431ba3873c9cca6ce519ef8729e07c3b1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
+
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mega.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88552549b3ef33b4445c61587dfa362ada7f207e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
+
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
+
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c2335210f5dc836383791907cb6b96f2723f3d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9c58d9cb562385b017c90023c269baa3f6781f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb6c3913bfb6075a716ed9c5218909be2201297
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
+
+ MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mistral.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e6c48cfbb13fd87d184e1880e212d7f428e4c63
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,145 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
+
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85c1aef8b152e20bee2dbf6cf898261b983efb6d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,130 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
+
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length. 
+
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
+
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mluke.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6bc67f0224589399fbadbcb1da2cf45ae71f6b1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
+
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mms.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a1bb96a28cfcc34730d911ec3494e806062eb7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,261 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
+
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
+
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
+
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
+
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil. 
+
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e405079c8c47c347d49dae520f7dbe53ddab935b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,94 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
+
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
+
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
+
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e26e14c0c860f69c01b40c91c13de1a0a0255da
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
+
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1e1758b95e16134a806e27c2d06245a20b28271
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
+
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e469ccb755f0c92ae75069fbd3ebd76d327fd09
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
+
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
+
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
+
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2483bd1cf7395f409e2f541c8c0b668efb527ef3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
+
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76a8c5ec386b5782e1678c9749aecd0c1da4ce9d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,77 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
+
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c246eb7c9e2c456b344876f4b1a7a438686b3da5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
+
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mra.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d818c84da9bb7dfb6ac53ca3642a7ef215301ff7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+mT5
+
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
+
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
+
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a9bff7d7d147c423482290e58e3ac0a1cac37a6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,195 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
+
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
+
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
+
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ac96e1280c659b0ba084d8131809f8762c11099
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,184 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
+
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
+
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
+
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_mvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1296cbb3d121711d50fc36a940e6bf2e6f3790ea
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,105 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
+
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
+
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
+
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
+
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nat.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e148c25116a6543e16b5ded77fb687898b19924d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
+
+ Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
+
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nezha.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8110b8eae1667b1242df7504127ba0bd14e57edc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f552658372ab335629dd57a4bcd07fe7e64fb85c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
+
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cb29bde831f943042369265748319553b9268b0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,125 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
+
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nougat.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8241d3197cce33499cd202c3814db3486182e73e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
+
+ Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step PDF transcription
+
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
+
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477c74fcc24fdbd2481e0699f2b795693b865d59
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_olmo.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076e4fe9be4478ff5faacb07d3c94783c79ba1f5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61999b733ba1fe815703590b195746c66c82cec4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
+
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
+
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
+
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a74049df4b416deed0fd1eac4bb69e077a7257
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
+
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4e80ede00afc9e80c270485a080008ab89e5577
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1,92 @@
+OpenAI GPT
+
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
+
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
+
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
+
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_opt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78e815e4a723de0349f5902969bd81470241ec6e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
+
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c620217b42c43d8218cf63a33c8d8f836494e0bb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
+
+ OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
+
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
+
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
+
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a1e247cee18f0ce501e7f3cb5d6c7ee3566e9d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,73 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
+
+ OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
+
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
+
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f7e6d875541271ccd35a00668386b97c3671ca6
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
+
+ PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
+
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
+
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
+
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06eb63ed80a5f41ffaa809b1ce1aef80efd77bdc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
+
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c840d5ace3591631fc11d0c0f03286ad750607
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
+
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
+
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c046beb61cf4aec27aa43ef8bda89dd40e626bd8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,111 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
+
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36fda5f1545ded5871177aa187962cc8a33cd0e3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
+
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95565e278f07616226e35f9744f4325e37f8650e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,137 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
+
+ Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
+
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc1d0934997682a805593a654f09dcd0b62f1f6e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
+
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
+
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7368d469359c5f81efe1861ce1fc459113209c71
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,121 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
+
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi3.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b74239c1c5e1d8eda5884fcfc36c1c4009353a5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,51 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
+
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_phobert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23f7107aaa727b5621bad4b78c097e36b6c9b9c2
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57603bf68616efbabee1f3087741310a43ae1b87
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
+
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
+
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_plbart.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1129ab18f4b2d1f0b9c600e4ec0ee936e4dfc34d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,74 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..450c058a7bf8024c518467b5f72248a866b52982
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
+
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
+
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d1248c22788ceb108154a8f882ffda86ff64d36
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1,147 @@
+Pop2Piano
+
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f877b00eb3f803915588dec892a4ed3e3097d9ba
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+ProphetNet
+
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
+
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37913b89f5052b2cc200071fdbc0b5f901d45b8b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
+
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..765fb9cb7cc688a39c38bfcfa22f712b205f263b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
+
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
+
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d54b5757c8d975d55d4f5ff3d80d52d58f86108a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,118 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
+
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
+
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
+
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
+
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15384749174f8c2ee5612a027b42aded2b2a2ee1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea8105413310476a191258f73ff5992a09b0d1e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
+
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_rag.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a628094a5a30283aeca8d5c56f8a54612823915
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1,64 @@
+RAG
+
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
+
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_realm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d33ba5c135b806c653f635e183ba3631336c8ce
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0c81ba40661338f645d8570cf0397a3478d14fe
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_reformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6634d76d9af3cb1a64ee4743abe881812d2e6e7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1,122 @@
+Reformer
+
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
+
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_regnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df2c1fa933523d41213e49304ea4debcb9b5d0e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
+
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
+
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_rembert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f18b213b8b390d28db49103120f6c4532e584ad7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,87 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
+
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_resnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8224ebee571f719c90a0f91afc5e5d181e57b8ac
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
+
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_retribert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f51a9c3d6525e0bb395eb5e924ba7871aaaece
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c837946832536e1811155a4212442b2fa271a00
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
+
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
+
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
+
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..946b81ec22d0f7b63f86d3f3622210f54d47cb6b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1,146 @@
+RoBERTa
+
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
+
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
+
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
+
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
+
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c7aefca3184c25dbf076381555a158fd12a8c1a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_roformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba21092a63da3791909377544ac13a33972bbbd5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
+
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
+
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
+
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c55fc87f62c896690f12d9a7d15ae323477bdcc0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
+
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
+
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cc2787c598716c83dfc3a8670f151a12089fff3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
+
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
+
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
+
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_sam.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26d46ed43a34d7a89ade048d1e190183840a9006
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,92 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image. 
+
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
+
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository. 
+
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af1525fc3d1705182b4fcf0b20df064bf927b396
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,133 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..014b3506a9a8fb247b8ae8b9272035a6c3f67bc3
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,122 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
+
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
+
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_segformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c9e846f5b198875ab598bfcbb8a9e0bf823c4d1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,115 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
+
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
+
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
+
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
+
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
+
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2058fe107dd13ac24e6f625f05f9587664942317
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
+
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe972727f0f0b027cf0e82c32b1b3a10d2ea3c9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..928fe7f14906fd7e0ba244a22ca146e03b76751d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_siglip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82c1fb26fbba6cb4a8b9a97a9bd330512594d50f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,101 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
+
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
+
+ SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a982e918799a93441c7bfbdb9b6dcb702094026a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
+
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
+
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96dafb214f05ec19d4608be7caf3693c307c2f27
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,96 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
+
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
+
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44a200ca7a4cb87b2571b7ef4dce781dd8761156
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,90 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
+
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
+
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c02f9cb543606e90341e20cde9094b277853a75
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_splinter.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5ec1c95efc37b2def73ee884fc4c96a83b28710
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,54 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
+
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
+
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bf4f5efe23080c214cabd24a4c99f82c1bc4473
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
+
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0fbe484400f7eeb1e7e46e9c71b84c411ec3e9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,63 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f2b5239c9e5c63ffd195c636531559daaa86704
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
+
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
+
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
+
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49df3e9125bf224d5dc8d3f3c1ee0dcd6f19e893
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,85 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
+
+ SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
+
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8551fa61445b024883b2c35d365920a78ad7c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7efa18d57b98ca8b1be241d96c02903cefaf3968
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,58 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
+
+ Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3114077f1adbf9d954485cab28d390e0655aa4bd
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
+
+ Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c9c6f58c44ceebbade3484992ec822c8c11d428
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
+
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cdcc9ad44b57c42778996724f63404f27a8e749
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d5815de40955f8e0bdb4844cbaf99fb2bcb0476
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1,325 @@
+T5
+
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
+
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
+
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
+
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
+
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
+
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
+
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
+
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
+
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac2247d928e5a65d35c6504a925d3301a8fa9b0
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb644b69cb79d0c6ce9cff7a851cfa13f64359f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
+
+ Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a33b16eb4e48160da0dfaa6552a1e1ee33da8d5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,207 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
+
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
+
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
+
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
+
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
+
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
+
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
+
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..909f14aed1bef87c149f0974e135d83f2199f007
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,288 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
+
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
+
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
+
+         # zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
+
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
+
+         # forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
+
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
+
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
+
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapex.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf0487901e98e1b8b81a4759a07a40fa1dc5e4fb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,103 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
+
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below). 
+
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
+
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
+
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
+
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ca5c63c5dc9f07f577f4a9e354cb61b182fcebf
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
+
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
+
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d9e4440a838d57b24442e00a8d67d08fa7ecef9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
+
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58d50c68e1bfe76b560d588fd59a8a1cb34afc90
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19f321d4e3d6b556207ef3747e62c7ebaa757dbc
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
+
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
+
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
+
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
+
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_trocr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1fec78d9f3fd13611ff7bfb3efdc50049d7fc5c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,84 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
+
+ TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05c38f3d034c37a8525e802c1fe05d4783256eff
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
+
+ TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
+
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvp.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98084594dd7cc6f13a25bb4c3374796969b29139
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,136 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
+
+ TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
+
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_udop.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3bbc317138543934a8b944250f088c7407c39d5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,72 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
+
+ UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
+
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
+
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
+
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_ul2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98a97464d147f4aeb7973c88f07acae764242abe
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
+
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_umt5.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9075e321e211ecb52b67e443c2eef3fc7d6221ad
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+UMT5
+
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
+
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34dfa52a293d5da0792fd83af66c90c56bdf518a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,55 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2d053aa504a812a3897d72bbd2455428b56ef7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_univnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2aaec319f9c3e2300779b428545fcbbf1f82eb9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
+
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
+
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
+
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_upernet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45d2554cc29afe53da6ab547ed7a639ab34282d7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
+
+ UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_van.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..294335c5c3c5e1cb90ed61db081c5509d8073d4c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
+
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc49cfa43e744928edd2f5369609c5c0fe5b2af8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,129 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting. 
+
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+For multiple turns conversation change the prompt format to:
+
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
+
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
+
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_videomae.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9c38b9f2189f80625a55ecdac34587097998b19
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
+
+ VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vilt.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bded32333c913e06f744cf5573e8048118c2219
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
+
+ ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3bfe3184bbfafd4e1d8917de1112a2e6f133bf5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
+
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df427a0cf83fbd0ddb9dde7cc6d4c15465f32b9e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,117 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
+
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
+
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
+
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
+
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcd4543da106972394b37d6c5f7f6a3944f93498
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
+
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98c77309d4700b00407e08efb1ba6dddc6fecde8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,86 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
+
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
+
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
+
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..660761a39ffca88f82f34a47b4a13880d2fd54aa
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,143 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+
+ ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
+
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
+
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
+
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
+
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
+
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..309a68eeb03f1f61f543b858a6c31ff07f923012
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
+
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..222b5de97f90a37ba138d6915536a5e84caeb431
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,71 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
+
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
+
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53422b9633239bb8ad55583973dd181a1beb100f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,61 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
+
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
+
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2ef641c5ec0d555718fd59065c82d88ad28c31
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
+
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..333d7037b0f3d201497a555ca601aee3bdc07de8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
+
+ ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vits.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efaf103c95e3d55e91801961d06f7302f7f3fff5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,104 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_vivit.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea5d83972bb118284cc025b98a68a1307ac1fb13
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
+
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
+
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1e42087f40bd6d7216e7ef7d19bb2b63aa3af5e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74cfd53d56943f3b03ea48bd5deaa1c2a5ed284f
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,172 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
+
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
+
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
+
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
+
+     with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
+
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
+
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
+
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2c88d234f8749594f5924b2a6dc4bc28478c32d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
+
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..808772d40e76b5e7183a181906f25894c8138d27
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,49 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_whisper.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8eb2d9a485277a508f83cdbc0c45ac6286f1161
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,123 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
+
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
+
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xclip.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63045083afd49a44951b18666572a7c9b7f2cd02
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
+
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xglm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddee620a7fd42d1dad42b86fb2b6a6b8085e965d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
+
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff10d19d7de4e2b92971ac1aebdcad3bb3ef2d9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..284ac122aec82bb6ddba3bc8b6ada67d7e26e562
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea633313407aab20647d2a343cd59979f9baf58b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1,148 @@
+XLM-RoBERTa
+
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
+
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
+
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
+
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
+
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
+
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4fceef7b3939ed6951944bdc6195556babc297
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
+
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f059c092bb5b6a41125204daa2ffdef682ec123
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,93 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
+
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
+
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
+
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2383c9db6446f44d9785f9f5f1750c84df9bc3a7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1,103 @@
+XLNet
+
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
+
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
+
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..686b45cc5e9df0acded8ef58b344383814df540b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
+
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081fb663edb86a0d71d107c0c964e9f08c46d574
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_xmod.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d40f1b0aed13a9c764c30731a98abd73c717f976
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,83 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
+
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_yolos.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d46793070c3b0f63d10dacb0dedeb4297cf201d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
+
+ YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
+
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
+
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/model_doc_yoso.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8708bf959715b5296ad13f55d855c4e0df4b301e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,59 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
+
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_aqlm.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcc1f0e69babe844e6bccd2264c58da5badf7057
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
+
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
+
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_awq.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb4100d6759f1d139a66da77c0b21c7a5e9bd4b8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,142 @@
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
+
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
+
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
+
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
+
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
+
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..454917b73bd655a7e81d34b74110cf1511863701
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,179 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
+
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
+
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
+
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
+
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
+
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
+
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_contribute.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d91cc2ff3920e1d4e3f4f33f092c17c57ea3422
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,47 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
+
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
+
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
+
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
+
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
+
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_eetq.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_gptq.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6968241119c0180bb97bf72b58659d4eb1041955
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
+
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
+
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
+
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
+
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
+
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
+
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_hqq.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48e29e2c812f6a83f4aabb3ebb8f237479e417d4
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
+
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
+
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_optimum.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_overview.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9d5a3b62a1b805f46683ea1cfab0c77904b714a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
+
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/quantization_quanto.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5212d4784523b0997ee2888ea4c9c139df9a339
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
+
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_asr.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9542ed5ab162215fde7026aa7537f4c4865e0f0a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,244 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
+
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75149f6175910f3aea4b0129081e3547a2b4bf74
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,216 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
+
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
+
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46c5dff684a8b03397118c793498f1d1a001d455
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,257 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
+
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
+
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
+
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
+
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
+
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
+
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
+
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development 
+
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
+
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
+
+     # encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
+
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84dd2a60c3852d69ec9f04824d1e825bc56f8e66
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,118 @@
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
+
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
+
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
+
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
+
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a14f468d9f6281687969af197bc327ffef4c377b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,256 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
+
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
+
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
+
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
+
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
+
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
+
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
+
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions: 
+
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
+
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
+
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```  
+
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
+
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
+
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
+
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57bb22e7c9566701cd5cd7b8e470e85a2a92c021
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models: 
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
+
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51f1bc1a0aae7f712092c529f01e1c59784b9947
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,167 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training. 
+
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
+
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
+
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
+
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
+
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5899db8dfa93c88f61d3a9dbc56f68a889666195
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,362 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
+
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
+
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
+
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+ def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6af28536beda08f68c921df2dd246480d7fc580a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,78 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
+
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
+
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
+
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
+
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
+
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
+
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d59a1f92fe96083ae307f954d03766b98abd0d
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,75 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
+
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
+
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
+
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
+
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
+
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5424802590970711f959dc6d9ab03a3ffdd0807b
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,128 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process. 
+
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
+
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
+
+    with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
+
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
+
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
+
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..741624a1aaf5d7ca47983abd593a673c7ffe9e42
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,284 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
+
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11e30099ed7e7905713c0c515a11b5cc4ca53c92
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,143 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks. 
+
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
+
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
+
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
+
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1db4ca2d98ba2ae997772f307e201a6e38ede601
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,300 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM. 
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
+
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
+
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a13b2d760b954f57f03d95c99f3f4212ce4ffea
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,82 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eb86647563907e0afefc4cff635afbfd0400c11
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,311 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
+
+     first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
+
+     tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..815d7504758152ab266b516dc85d95ab8af3d8cb
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,293 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
+
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
+
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
+
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
+
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
+
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
+
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
+
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
+
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
+
+     Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
+
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
+
+     images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
+
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
+
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
+
+      [[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
+
+      [[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
+
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
+
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a28df21ffc58f6960b08dc7e80a5c6aa4fb233c5
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,613 @@
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
+
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
+
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
+
+     Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
+
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
+
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
+
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
+
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
+
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
+
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_2.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae91f5e0a79be2eb069bba11327c5bcde4286575
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,484 @@
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
+
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
+
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
+
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
+
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_prompting.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3f5d659b91225a42da69b365039dc76b7a3db87
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,331 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
+
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
+
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
+
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
+
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
+
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
+
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
+
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
+
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
+
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
+
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
+
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
+
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
+
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
+
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
+
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
+
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts. 
+
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
+
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
+
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
+
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e78dcd00ce7ada3b59e2652a5f9a3bcd4b77ce
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,290 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
+
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
+
+     offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6e18a21933ce3ce0506fed4832c0c712b15cfd
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,359 @@
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
+
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
+
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
+
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
+
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
+
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
+
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
+
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
+
+ from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
+ 
+
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
+
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
+
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
+
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
+
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
+
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
+
+     pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e2e3de649262610bf18b71c28f74f7c1d759872
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,344 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
+
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
+
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8aae3325d6dea2ec3f05f0aeba19ce443ed362ff
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,244 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
+
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
+
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5688673cad49e452065e9c3d9b70d11edfd9aa8
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,242 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
+ 'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
+
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
+
+     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+
+     result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
+
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
+
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b388c91f7328cd7918eca50266996c3d8cc9596
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83f9ee452cb6503fb25e279ee912324a11b12105
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,386 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
+
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
+
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
+
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
+
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
+
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
+
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
+
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
+
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
+
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
+
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
+
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest:
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..326d7319b5a731f7d410b163daa2a33df4d25cc1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,77 @@
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
+
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_token_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be375eaf745b6126e9bb5738f1c3b9bcbb21ea76
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,407 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
+
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
+
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
+
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+
+     labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
+
+     true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_translation.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d9407cf974b4551f7605b055a5f8fd1e1bd239c
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,262 @@
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
+
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9451b1821f0b2223e3201eee524e564745ecdd0a
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,378 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
+
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
+
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
+
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
+ 
+
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
+
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need. 
+ 
+
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
+
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set: 
+ 
+
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
+ 
+
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
+
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
+ 
+
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
+
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
+ 
+
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
+
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_1.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44c2ddff35e5ad099327f3663cd4238893a710f1
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
+
+     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ce6ab41e6a879d145f72f99606b9cc19bd81ecf
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,284 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
+
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
+
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
+
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
+
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
+
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
+
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
+
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
+
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically: 
+
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
+
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b0c3e370388fbfb414c62c1fd6aa1b43fa597f7
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,92 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
+
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e6400872434485e68ccf9f929277ff35a193127
--- /dev/null
+++ b/openai_text-embedding-ada-002/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,191 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for. 
+
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
+
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
+
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
+
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 860caa2e01812b4b4cd081506452ac1da20991eb..98a5e289c978ad8e76576cccb205d6195dc87e90 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,9 @@
 lancedb==0.8.2
 openai==1.31.1
+langchain==0.2.5
+tiktoken
 sentence-transformers==3.0.0
 torch==2.3.1
 sentencepiece==0.2.0
+pandas
 
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__config.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__redirects.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a759c806f48a84ce1d82568f742bb5517913367e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54a4fd71766b4518c9d6e57270e334395cdae37d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89c245b526102db854e450147defea5bd9c00842
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30819b35569e2a879c17637652653f9de386efda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd16c67c5f3cae30d54ebefea8122c66c81248cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba169428031d3bcf0c37756c289c2242371cde64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_5.txt
@@ -0,0 +1,81 @@
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1bf36552ea906765ee60621f660b8cba2f3ac1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef73cd7ce1d71ee625d51ea96e98eb2376891251
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_7.txt
@@ -0,0 +1,94 @@
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8dcfe9867ed00f5778d4b77bc2b95b65314c6cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_8.txt
@@ -0,0 +1,582 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/__toctree.txt_chunk_9.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc51e447c0cca457a646e1df831ff0c9b5973bf6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90b092a930f22e74baecefa9cb99e097f35e953b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4d50b3d7df133a1bc8e76692d7adf636cfc1636
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f18ae8f8137c0c78116fbf1b69f2cff4a78480e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5908389208b135f05f80aedd1e35eb243083e7e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98a27a935747603e614f06b2e798327115cc2738
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3276651a6cda7db1cb7e0546eab20c6067488c5e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7300cf5d45fbac17418cdc0481d80dc34c15a65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_7.txt
@@ -0,0 +1,13 @@
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..751df585336977f00af3b9d1594b7ca4b131b818
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_accelerate.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fa72661b9f389e1c222e4df6e4cfa781b5999ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a90e8fb398429d17bd0fdb281d90a276348cce99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e212dc19838c72351aad509acd57d394dcaccb8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_10.txt
@@ -0,0 +1,28 @@
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52c5b5b6b9e8f8f0e7a7b1d2dd36b2129e3af515
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba44f44dbacb1f3ee3f528c89d65756f28a20c8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..313681e73b6f38bbe3000156bff303da7e26784d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50d8c5ece061d73a2cf945be1c0349a8e22514d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad0e40b009a08bb7c60dce16fdf3475f00bd32a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29b2f4f10b938e7fc9b0360358d4292701c6fcff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_16.txt
@@ -0,0 +1,15 @@
+git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f20d287766056852dfb969286983fc1c4184cc6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fabaa8bbc79d722e8fe859ab4cc8723e2b352da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_18.txt
@@ -0,0 +1,10 @@
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d1e1a793c3e21b19c2462d36264d2282d6a0cc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_19.txt
@@ -0,0 +1,2 @@
+Jupyter notebooks / google colab
+Local python scripts.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..484e1231382dd428d33942196e782a49453e9a7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d1b23eeb1e195abdb01c059400c3c77107de05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_20.txt
@@ -0,0 +1,15 @@
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7914da846295a9de8439525f7ba779ec9df729c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_21.txt
@@ -0,0 +1,4 @@
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d779ccd92579802f03e80b62e6030875a04a33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48a37ccdfaaae0c0cf4a79270e576753cccdb0cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5d1418d0d9f6cbc8d0436a61408643f7e0808e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_24.txt
@@ -0,0 +1,11 @@
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e247cdf911320cd7966f9bcc2d1e551ae439b77a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_25.txt
@@ -0,0 +1,6 @@
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b77bd4cdc24e12f013eba4b5c9b3f3be639013e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_26.txt
@@ -0,0 +1,18 @@
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87115ff8fda47a77407d2f8e60e519ed9958896e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_27.txt
@@ -0,0 +1,23 @@
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43e045c2292e6bd39aedf700a35527b502ce3d7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_28.txt
@@ -0,0 +1,3 @@
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb58a7bcc7fca2618942e6c538873bafd89b964
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_29.txt
@@ -0,0 +1,6 @@
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a95879fc026b8b3f0f40630a8b0070eb76b57449
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73ac4a0c7acd0381440321656989c24f725f9f38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_30.txt
@@ -0,0 +1,7 @@
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9915b08383e5b34842569c331f19ae5d63e7ffc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_31.txt
@@ -0,0 +1,17 @@
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dfd4b48d0133f75ec63e4dd6b7ea4ae2c696421
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_32.txt
@@ -0,0 +1,5 @@
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17313be10220bbb4ee998743ad5cbc00b6e2700
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_33.txt
@@ -0,0 +1,3 @@
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9e07b2ba7566c7579c52390f1f01b011177f17e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_34.txt
@@ -0,0 +1,33 @@
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f48ff253e575d5bc4a801e1508ea063bc784ad1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_35.txt
@@ -0,0 +1,45 @@
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0d0dd91b4cf5ae75723de4a9186a0a6b3d7404d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_36.txt
@@ -0,0 +1,2 @@
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..beb51d5ee57c85f27e0758440e9bf1d8210b9bdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_37.txt
@@ -0,0 +1,11 @@
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e40057d3e425b4a13234902ed82dd8a683e0eff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_38.txt
@@ -0,0 +1,5 @@
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8de4ff60acd06e61f39ea95a022d4e93648011bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_39.txt
@@ -0,0 +1,40 @@
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f722b1a916b0963330511ac7ed47bde54f497e1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74b59316bfe03cdf94f26611a88794afd8719e5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_40.txt
@@ -0,0 +1,47 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b27d65bbcf76b98951f9deee7af51e7517828688
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_41.txt
@@ -0,0 +1,5 @@
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27be1a13c321d7e5cbec242fedba30ca984d7e9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_42.txt
@@ -0,0 +1,19 @@
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ea49a227bdd031a8a3829839ad7795150608621
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_43.txt
@@ -0,0 +1,5 @@
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e7c580293a9ccc5c96d00f15439e8094e815495
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_44.txt
@@ -0,0 +1,6 @@
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d65e054ebdf9b7f606107bb41ce6ba522537ade2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_45.txt
@@ -0,0 +1,5 @@
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e41b66731cc311e08d1e9900ca7c0db358d166b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_46.txt
@@ -0,0 +1,3 @@
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49a2ec48f4969cd7df85efede4346b0db135d271
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_47.txt
@@ -0,0 +1,20 @@
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e19a4dc3b405a4ca18e271b6e4cc916b291d1d71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_48.txt
@@ -0,0 +1,28 @@
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36aaca74c9a9d4fb82967a311e0a3d8456afac41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_49.txt
@@ -0,0 +1,2 @@
+make style
+and verify that your coding style passes the quality check:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa322fb2194a9be0f00d78062a08488a7466834
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4012228a96c8f3ac4c88879dfca41308d80fc529
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_50.txt
@@ -0,0 +1,16 @@
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2ad34860e79565120fc7a3849922f7ada8b1ee0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_51.txt
@@ -0,0 +1,18 @@
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbb8f39667c465de9c86f84ca94c2b48bdafeb91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34858918b9cab615095dd38b926a93612ddfeb8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd572961d585a92246090b202035576ca60856ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_8.txt
@@ -0,0 +1 @@
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ce6aa0a48017d3ba7273e472428b85ac76baa2f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_model.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40b0e609826a1e69acd42713f96100f485638087
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f54053e3be62402fe4d0fcbd25225a4ce153db1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..419fa61ccf49e8c6e8c4ccf35f2019cfef1db0ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f549b0cf22d66ed874110d82601ce0f96dda931f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36754c6704a02ef49c547cb8169898550be82866
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edd90d87417de89bdadf828d1e9ea123490425ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246ec11c37958abd40727071e4b3eb28d8e6e184
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16faf10f85f4000a834d9c588d1001b2446302c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_15.txt
@@ -0,0 +1,10 @@
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1182fd437b4c36851d84e0591bb6969920ec5b29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4713c540ea1c974ac11a7a137616c38ac898360
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c6ef476752320221d01d71df9347334f1f4e7df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7fa7cae03e5706a3e6fc766898c95f5c6b44720
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4933b98f96054a82250b03e0320d5f9221875c9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be4c53fbb252a9e5df31bf7035b06431a156f0f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a728294a0f4a115700cc6cf831a7e49ef08198e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7da844839f6a7c3eaae9e58e1cc462989ea418b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_add_new_pipeline.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1644ec24586d165086fb3e22fcc0b0b731f5fb0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b397de6b946721828f4dd055b0ba25021f312199
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cf339fd5d78bfb04cd9e6ebf235e22a6970ebc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6d34b2445064e0700a73eff91edb662cd7640b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea19a4593f7e01e840759e2fe66c6815bdd5b4f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_12.txt
@@ -0,0 +1,22 @@
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8149e717d759f1435d51106d6c3efe70facdc287
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95cb635aa3185628f8ece249a90d1ca737734f5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50955b83c3a148baa83af8ec8a41ab7c51705634
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12e3add11dd7696c15faac2038ad2296ef3da168
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_16.txt
@@ -0,0 +1,6 @@
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6039b95686479754b181825ffb22eaefa5be2b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_17.txt
@@ -0,0 +1,5 @@
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14104fb8dcdc5571453a4ac6ee7cb92de1517b5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_18.txt
@@ -0,0 +1,9 @@
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae2423dcb6eae50d3637860a174d22bc0658bc97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_19.txt
@@ -0,0 +1,23 @@
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e78bc8ff0d9edbd0aca118b04f4094c34ff29c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..955d84186a290df0383a2c288a315a344b5c6f7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_20.txt
@@ -0,0 +1,8 @@
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8a21d85d23424ca6ff6de61c1bc20b251a2e3e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2cf0f3e3d68fb659cc30b3c798c134a66c10b0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_22.txt
@@ -0,0 +1,17 @@
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7db2c9cb2498700230968e02e564837d332c7090
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_23.txt
@@ -0,0 +1,8 @@
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a49c48f611e1086db227086aabc3b5aef223ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_24.txt
@@ -0,0 +1,2 @@
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac6a100812294d39f15cbf3d063a84ee3ae58142
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_25.txt
@@ -0,0 +1,6 @@
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22050d46f6d93acbd5759784a7ad5c9d9eb01969
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_26.txt
@@ -0,0 +1,7 @@
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7772658b6af0680a679a6e0f5c1fb95208479677
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_27.txt
@@ -0,0 +1,10 @@
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acda050bdc14213d988b3316bb72338107ba4096
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0030641808da729e46feee74a220fc2e652dcb8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_29.txt
@@ -0,0 +1,13 @@
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e7c5a0f2edcec61fa172c994c7ffdfb8041ccde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b760edb05c607374c714f418ed06a82d175f183
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_30.txt
@@ -0,0 +1,2 @@
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9a00194872248f7e84f088a6f187f69f55b351a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_31.txt
@@ -0,0 +1,11 @@
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00a6bee57ad0aadf7c008f389dbe438645c3a275
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e77f664e3677b4ebf425f7e549b07125c115e17c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16dfc240c0b39e7276461575119f0fb22a23297b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34f71a58b2b06a27909c825c267c04bb7a9653bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f0a0f50e51a606a6cf4e03bacec5ec785e16881
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9ecf55eaf21e8e938ab882e68afaae2adacdcf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_agents.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24331c2df8e2f141794cf3ed1a50c8a3110f0a12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03744493d9f16ee0fdeff415c5b032f41f006161
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_attention.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fffe343ea8f5b84747043443a431b38fda23105
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd5a0f778fc782534e44e931e7315de73894f8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bed88881c826d1ca5a43733025332e51254c9b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3b88c889a9847e446123e1f626eaceb5f33f519
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..272ccd04c45d0c34d1a63210fb9944e377c01b4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_12.txt
@@ -0,0 +1,2 @@
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eae92dbf82d424f67935c122c7a9b6dab1b58462
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_13.txt
@@ -0,0 +1 @@
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..717cd3767f6c1187506b2bab2f8a2e03832612a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..868b04018f0600bbf605e4aeb37fb14f38314173
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b0e5038e1a554640be46855efc7d5faddf5e6d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad2acccd2de3570bbf45d2be8cdd6e2f2695bc75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ac7e8eacedd48b0a209b4a2251a3996d72ad0d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33382a96c7e479e23e093c4205b55107aa393237
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7627864ce7e2f94eda887c26392cd438d5e1e3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_6.txt
@@ -0,0 +1 @@
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..965d4912bac3236e2eef5710b259e070468f133f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2900f5f1aa716f7a221733ac90a1fe97d05d5c91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abdd37f463a9fed830fbe5ec1396fe95f6ec593d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_autoclass_tutorial.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac0f66d99ebf0d6c9b141e8f8be15ea37ff19169
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fff5c27bc6abb96cdd0a19a95da138e6ed3526e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5b8470cb8517dc156d6424cb72dc3cb8305f82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_10.txt
@@ -0,0 +1,24 @@
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..152b03ee16f74e253777b9e3f85d1634437b6edf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4466fdcf28d4ab64f01309a41690099932573fed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..213203b1e8c0c415d36ea40ec7266a9d51f9e7c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..010a95be46a6b17650415bd5a997bad23f768575
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_14.txt
@@ -0,0 +1,23 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0934da8a4f06b8761413b5f964eb7bbd35b842d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_15.txt
@@ -0,0 +1,10 @@
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26c402b9747003442c433de2a74d132e76b5274a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7189d04e63bc24a578aaa86177fc2f71012d21bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_17.txt
@@ -0,0 +1,13 @@
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15f245d7f35c24339a167299ebd6d8f075e8a560
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c820034960e48e3c3eaf354b0e839e673cfb7855
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_19.txt
@@ -0,0 +1 @@
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d7cd1293a51a63cc655d077d4ebd980c8dad7bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de7f308f97bc1ab205a49f79444d7c024401d4dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_20.txt
@@ -0,0 +1,23 @@
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..095ac045c374c53a10cf8f1b1679237277ee5a19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_21.txt
@@ -0,0 +1 @@
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2e0b6a114e398bb4ee82a40f1ec0444bcc6f5a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..163c170d5727371790a6d6a1e7e5360ff07f9bd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_23.txt
@@ -0,0 +1,13 @@
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36997e80a95a78e179cb870f2d1676bda6705b4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_24.txt
@@ -0,0 +1,14 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5068643837172ac914611cbf1766052146124e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_25.txt
@@ -0,0 +1,23 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9ebb52d8593aec517675706857e0a9a3289ab2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_26.txt
@@ -0,0 +1,5 @@
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84b0997e6a223111d50d87d0afc2bea136308163
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_27.txt
@@ -0,0 +1,9 @@
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73231e443c19079a1aa418ee85a31721e10aae31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d807e3288ff8f952b277241db6e0396c259b571
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6908dbb4f34a44ef4ece8f6543e3855c036ecacd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e978312caea4175b8b9e31d0698f3b158436bd1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b7ad75a48655fe7f48634e87b188adf580982ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c20ebe16a120ccd6742ff4091af9d1598a9265d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03b232d20ac391fd2c6665602efd18d6a41604af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..252c3e77803d755354b865ac45813a07868552d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c820034960e48e3c3eaf354b0e839e673cfb7855
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_benchmarks.txt_chunk_9.txt
@@ -0,0 +1 @@
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24bf6e7727de122664e9b789d5d409ce8e738f4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68afd4cc8e752f834fa9474205ec0f4235c64ecf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3434497058d07f9c5302fad3326f1f2ab173e719
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7e483c6d0ef60b21a4b7a8639f2f4794165509c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_bertology.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab0a92a0d6a7b036be2204eeb39cf81aa128c37b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10df708e3afc7e03423458857cd982ee45b34b6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_1.txt
@@ -0,0 +1 @@
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08a208d69601e733fc63c318e32bc00ace526cda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd89e65542bf789d28657d0f3776354bb85f1c59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cfc8a35b7b164df6f7c8c30171751558d6e6efb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_12.txt
@@ -0,0 +1 @@
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9530a179369e935814733e1761510b88868dfc36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3fc418629b66e631b433d22f7e620574c5487ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_14.txt
@@ -0,0 +1,44 @@
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1eb93684cab59f8ffdf5648ca41a4563c7348a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1da4c3f4bc6d331faa86d899e9edce259db60b2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..871cd79920d8a3f672f3c58bff1b787b6edd7374
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8b3249a2955ee80b0f7e37234ffe634dcdbd142
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca0d52f8a16dc2ba91903784565a8bcf3a1ca0e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..383093e173b250d7192b3381d68ba189e1e404b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..169323474b193da07ccd2ca1319d0ce02f7acf6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac31d5532b096e4a12cf647e14b96ad38b3d2740
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb3db54c7da7c5cfb5e876df097131f08b877aeb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f9733b3b28f850ea430a0a37e04de06d1ff7c96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_big_models.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97200a2ce40d36ee5737507c60d9be1f65d69efc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c07af55001adb879790956934a2e23d7572fa4ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..649739e1643de048045d32e6b41e58ab4c38c443
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a8a984250ccd632037a7d4c3c2c2c5e4eda65f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f61b979d264f1b895fccca9c759ad9d347ed2b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_12.txt
@@ -0,0 +1,12 @@
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..746967d146c1ea8a52b2a594dccb19313136af7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c8e9e63930d53b1e5263a1558b6e0b494f30693
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_14.txt
@@ -0,0 +1,44 @@
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7de549f0818318a1c72481f0425b44b791849302
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_15.txt
@@ -0,0 +1,5 @@
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..124b34f759aa3c290b5009463d7a05a71315c33c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_16.txt
@@ -0,0 +1,21 @@
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b2bb9d5581dae4ae6cba660bb5131ad5ea1e25b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_17.txt
@@ -0,0 +1,2 @@
+schema = get_json_schema(multiply)
+print(schema)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b471c1cd1362c2f3db9336b8bea09bc327c8671
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_18.txt
@@ -0,0 +1,68 @@
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d88de58e26afacc3372ebfadd0f735b47e9acf5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_19.txt
@@ -0,0 +1,21 @@
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..118f5e31bddd695d05761d7575a3bade28cd5e9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b251ee9169b7ec74abb34f5f8609aff85351ddc2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_20.txt
@@ -0,0 +1,4 @@
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..808c10c7b40dd34f8d36f92f58b43bfbc29af0af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_21.txt
@@ -0,0 +1,4 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e59d9a3ec12bc91473aad53f385ddf702b7a27c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_22.txt
@@ -0,0 +1,70 @@
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f40e2a23592c20543c0d37e96e36645d87bb8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0ecc78484e0b51206abe84828da3c868d8d80e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_24.txt
@@ -0,0 +1,121 @@
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153602130528a8571bbf5e10c45ff281652ecc95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_25.txt
@@ -0,0 +1,7 @@
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7e2bc6b440a7c183de10ca79aee119175ffe080
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38f47a8a469cba52da04f426ccfe298f931a1965
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a96b3282a97ea2603d18f27f9c17e51cd66fdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a04999f361f9ea2ff629b91403459ba86870f0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_6.txt
@@ -0,0 +1,67 @@
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66ee423b454237d02ffc88b39a920938c1ac76d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_7.txt
@@ -0,0 +1,27 @@
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d811eeae609237fcd5ef71be976c6c676d369b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79bd77a0d606df0c4375949279ff17722ecc1f0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_chat_templating.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_community.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c0b026adac7f4003764e19db847051a7e73152
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32baf6a26c338245306be7bca2b42b7d1b8960b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1cf37656234a312163bcc3c4f3e9ea45dbe71fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9cddaa760cdd21e8a389013c85675534ef6a277
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b514d1ed2ef790e16b696c7ea6c139bd06e4123
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b72bd5ba8110c1cffc51ab470649fa88f33d8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..631deed4dd9a3d68628901cfdba439ce3b04d4a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b262803d461b557ae88d9e71de5d07e5c02fd447
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99ebdf153a8c49718bcdde1920311810ed562941
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cae100f53b3fb92bbc4fe7d1eec4a0d08d2222e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a8271317423dd28f764e9d3b05469ccdedb61ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
+
+   pip install ".[docs]"
+Run the following command from the root of the repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcad0b3e5e8dd45161fde53cc4330cc1f34d50ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ed354a70cd47ff7452a6d411f76d4d8a383d9dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_19.txt
@@ -0,0 +1,10 @@
+git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97371df876b30180616c050e4255f31c4a8f1737
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bedfd1cf4eb49c4b7d85965e42bdca1003b62c9c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_20.txt
@@ -0,0 +1,4 @@
+git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2bc0489641edcf364d55ba028a208acbba68e9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_21.txt
@@ -0,0 +1,4 @@
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71ee94641826468637a973fc418c628ddcf14d80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_22.txt
@@ -0,0 +1,33 @@
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0a052b3f67ec8cecb81ed014f76c2c4edfa7cf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_23.txt
@@ -0,0 +1,2 @@
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c558be0c4f9a1410c8341739b564a065dd8dff25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..291e92f08a5c2a2fbc4f94ad0c69884cb89d8c3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_25.txt
@@ -0,0 +1 @@
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d32874c0d9f997efe5bd4eb8818b5a26960b67ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_26.txt
@@ -0,0 +1,11 @@
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d03c7926ffac7ee112c19779444f4ea3ee2eefc8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_27.txt
@@ -0,0 +1,8 @@
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1040089b646aa95f419d6ea0b6f3e18cde22cb89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b254b36feb3c4a8b3f5bc8edcc872ef553119e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_29.txt
@@ -0,0 +1,3 @@
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b535b86ea90ceea06bebbf5c7cc5c4d18df4cc7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_3.txt
@@ -0,0 +1 @@
+All contributions are equally valuable to the community. 🥰
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37584c18623d7d803812f6d284bdc71bd9d93ed5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_30.txt
@@ -0,0 +1,7 @@
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04d9e44ac6975d3a966c7fb9c3bdf4c1ac5b9254
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..561bd3c9a77cb5c314487e97394afc63c4c54290
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4a369a2d84ed44388a089549860a50d5c130ff6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7751076e3bfc9a77c401dd91b707167d22fd6ef1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7779f23e4287087830ce34a4600c334efa787f0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e825020e23e9be8dec03b02e6b1ea3ddc8188b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_contributing.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6841b0f6cc4d1681752a35dfe3254676c5adbe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16f1d0ccdd68dbc30ce858dbb8a436218e142a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21339116db17fe3f096b7b1a068678d1ef384eaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d123f516fb804bc17771ffbb9e665650a10ee5b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_3.txt
@@ -0,0 +1,64 @@
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d3fde932a5f64c46ebe333d8b9f87f56697181f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4635b1088aa15f510019a75fab1956d61ee31e6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b18a2ff2c23f5da002a908bb3be64aa00a18c474
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d35d04a2642af4e9ab8c3731c4f12a70fea234a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..974896f97d50037e960a68637b881cd9ad2f2691
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_conversations.txt_chunk_8.txt
@@ -0,0 +1,25 @@
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4064cef0a1c3705d697ce20b9340a7099c6d4372
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37c84854eb3c675859701bd43ffbe0b7527e2ea3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9432e7d863f1332413f3f604377a91acb75cd74a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b0be8a17c99f9484bf19a4a75c5ad10b1fdfd49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17e0370ec243753efbad710615ef750e66ad9b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5fe316d42018048ab6eb27dab2f8316738582c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0cd383b8233531821b11f872cd7f5a1d737f27f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4a3a13e6a59099016fab5f38547ab0dfca86678
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_15.txt
@@ -0,0 +1,2 @@
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2748e0b072eeb0508bd6065a304e42704d8d2154
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5087c0b0f7085f0835dea2df37384b8ff9eede2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_17.txt
@@ -0,0 +1 @@
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90ff63e1becc4b6b5902d2005c17eee3e5bf4a87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_18.txt
@@ -0,0 +1,6 @@
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8eed0b12fe832eaec647ff1e446184399590140
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_19.txt
@@ -0,0 +1,4 @@
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d1d87b2cbc63b8d2ffdec1bf0c573092827d1e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c499fa1cca57600081056713ce55e173028f984
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_20.txt
@@ -0,0 +1,5 @@
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..201f41000e3cfa4abdb71811f3f816ea682b7871
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_21.txt
@@ -0,0 +1,2 @@
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0a8b021177591a58b03a53d0408344554e40c80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_22.txt
@@ -0,0 +1,8 @@
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1e8af3eb4689c0edd381c14d8d6bdbf80546011
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_23.txt
@@ -0,0 +1 @@
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be22f8f7d34100e5e4041e1bc54699242ca47978
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_24.txt
@@ -0,0 +1,9 @@
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..615c44cd99d911756abea0e0b6196f584ec2291d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_25.txt
@@ -0,0 +1,3 @@
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1355228eb0add8fd7e57909e2df7bea15fa2b8a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_26.txt
@@ -0,0 +1,22 @@
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..908262c5ec797a044d78c4df9b86bfaea5eaaaac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_27.txt
@@ -0,0 +1 @@
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a283d92e46d80077035ec1c770483582e49155f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_28.txt
@@ -0,0 +1,22 @@
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20040f45800d149e18935d581feb4268cfac663f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c30f8a29de66dd9cf26f0ca4fb2e57e6801cac8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..592eff21e8d5de629e80faffa5636fe15db329a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_30.txt
@@ -0,0 +1,7 @@
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a6ecf19ecab863f8ac564f103d132892569b1d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_31.txt
@@ -0,0 +1,5 @@
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63502734b7c3c82d6077ff6a2765b327d979fb4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_32.txt
@@ -0,0 +1,4 @@
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e4f5307485db105340fdbca8bfd304732d37736
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_33.txt
@@ -0,0 +1,6 @@
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdc98e5bb39b77c0a35914f751d4d3ecc9b57c14
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_34.txt
@@ -0,0 +1,5 @@
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c670f116f6ff46a9e4e27030b18f3a9d87eb5a1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_35.txt
@@ -0,0 +1,10 @@
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d7adb75ed58008c364f68fec83f002af0335d04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_36.txt
@@ -0,0 +1,3 @@
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..343fa31431014f4dd37d036acbc03bde1701dd01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_37.txt
@@ -0,0 +1,14 @@
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae9518af00ed2cc5a2a51652cdc81c72cceca076
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_38.txt
@@ -0,0 +1,14 @@
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9ab31b9e611fd04f096e92520da8a44a61f6bf2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_39.txt
@@ -0,0 +1,3 @@
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee6967e61742fc371984df119e148f37f4bca121
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2da971d830e7174d0db98e0bc835111629180a71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_40.txt
@@ -0,0 +1,12 @@
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..408759d8f1ea64d281f1857a7457014684b777b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_41.txt
@@ -0,0 +1 @@
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95461088af53bd540dc9edc323f79f477abebcdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..050320b9ac4ce6cea9ae9d2c888fc6069398c60c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25dc9255e8fb960e003888c60537d266b0c1d9dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be5c1c150fd0318f7d873dcba4ca4026e09daeec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..101b7782317f3216206fe59161e924dfe7b5bc96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_create_a_model.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831eedac8027368624f10d54e96d241c21f083f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b776232801aba00ce2d819f6fc333bdfb2ba6421
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..163b1a92810fba3cb0cee9cdb3516059af095ca0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_10.txt
@@ -0,0 +1,13 @@
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..566dbf81cd926ed8dda73c6f1b3d9583bf77b908
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_11.txt
@@ -0,0 +1,3 @@
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ccde2051d90be00dd3e1872e5a25b30bf0017d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ffc5d37284fc81efed327666c03a9b17cde5406
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..845b7ed54f6f26a57d83dce8825814341a1724d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd2b9c7d7b4154ea2456bda69554b1a575ae097c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57dcb6e18bf66b576589b7b1eeb612d21eb8ce54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ac9f63f61e82556595f8def0284df7c6a9900eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_17.txt
@@ -0,0 +1,15 @@
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adfa03a49d281af783387bd58b443ae1d7204a87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_18.txt
@@ -0,0 +1,9 @@
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dac1b21a51efafa5d30649cef2072dbc23d4a8c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_19.txt
@@ -0,0 +1,11 @@
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65df3346bf8123d6c593a6dac68d2c42ab01e480
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db8fa64770a0e53f9c3eca110da0da3e94ef64f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_20.txt
@@ -0,0 +1,5 @@
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b98cfb829ad4fe12dbab5fac763b7e899f828306
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_21.txt
@@ -0,0 +1,12 @@
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65749f97dcd9eb1fac0035e5ea5d43c06fbc153b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_22.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33da192b48c3b2474293db58a437a759ab40c304
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6908ae0363546f440a9901621b61c15b94da4aed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f17750079b8cf1314f858ae4c63a9943babc27f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89037b9aff8f49f2a089f03c0420f5b828e86837
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3684f163b1b8c7fefb44dc10d3dfc6edb1837c6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5134dad8684d1cfcb942fa987f9a051efa89bb5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05ce70eda5caf905cb8a03bd70e7aff75ee983d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..300c5fd176a249260465121c8afbc6f2d67586b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_custom_models.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74e400af8f00622b43c25550ac3440708d78a004
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25994b268c7c64f03915a95b0959636765badb6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad8338af14616d7e3a4af6568fcb7278684ecafc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a5d49d6bd0f26087e436347c677067d9cd3e1bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_11.txt
@@ -0,0 +1,3 @@
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2672a43e0824b0a08cf8d7ccdf66726254f5808
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_12.txt
@@ -0,0 +1,6 @@
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb3886797158551a130772898988f58cb61cf987
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4db301b2c2150838d3939c2efa45942320b1cf6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32ca79fde3b1e89f05be939f5f0fb87156fa4b26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f5c00890c50dc13b73d74d1973e0fd33d723279
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_16.txt
@@ -0,0 +1,6 @@
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc16572b26a282ebf23ebd79b4ca6bee8f671a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_17.txt
@@ -0,0 +1,7 @@
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b89e3ba7b2e5de8059189f3048b079a5516d9259
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_18.txt
@@ -0,0 +1,106 @@
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b065819884f262beffb42ddcee47ed11c62364ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_19.txt
@@ -0,0 +1,22 @@
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c8b5f538f68b2b7f513ce2a3f5bd53b43b47554
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b8dc3bcd6ddff9642fc72948f76b71e327e001f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_20.txt
@@ -0,0 +1,13 @@
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97f45f626e83bdd87dda2ba7fe069a130b28229e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecc20cf7a560c8c6d0101f0138d85524e1418f5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_22.txt
@@ -0,0 +1,10 @@
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a629e9fdbe8c96f735e202105853675f4bff20bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_23.txt
@@ -0,0 +1,19 @@
+*** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29e26df229e6501cc9129b24c1c3b896c27f0c30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_24.txt
@@ -0,0 +1,6 @@
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fb363e3fccdfe84e37dc31b1e4adea1fe82df76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_25.txt
@@ -0,0 +1,7 @@
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2f58c83117cc153ba18b062c92d1b4a1addbb1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d070e33fac71ce281e25a29100cbb035ebbe3163
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb3a276c7e1ba03be7a232dd5317343e8359cf7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c94c93cc3af489bd0e40606d6e5990ff8af712
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..013baaa54e6c461396bb28566750961217a62031
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e249778d3bea8a264512f73d240f251afcccdc18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1024e27fdaf8cb2f25a826f6a249098f8d0bdde0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_debugging.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b99d61d5bda88d2048d0b488eb73910c702c65f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81e63d20229e529c36021281ea1d099e58611f96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0dc4f425c4832e214a8b323a0ab136c02abfe0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_10.txt
@@ -0,0 +1 @@
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..690c9b8dd7f9f4a1521137ebee336b7727cdc252
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea330dfb08a870970bc22d2a5318ee3ee0466956
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f8e088573f97602587b8772b9a247242b088031
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_13.txt
@@ -0,0 +1,3 @@
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b62696e8aabcc7032c325f755c5f46721b7b12a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8140ea405106280ff18c38281cd48c8a55ff758
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70659f829fa169b90538644ad764e191f2492b0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_16.txt
@@ -0,0 +1,17 @@
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85092406c4de10d6f1294c229288bbab4bdf7ff9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_17.txt
@@ -0,0 +1 @@
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10964e649a7ad8f90dcbe4ef12f2199ee158b844
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71c46ded736a7fa3ec109fecc564433a3a6567f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_19.txt
@@ -0,0 +1 @@
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41325f932faecd050b4ce7e0a15d5dac496a4653
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c921a568f86f55544becdc53f7a25bcf4aa6158
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_20.txt
@@ -0,0 +1,2 @@
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f00523681e24e176c6f4f521833341d7808b29be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_21.txt
@@ -0,0 +1 @@
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7446fe3b0caecacd37ea6e6a4af63e96603cdc83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_22.txt
@@ -0,0 +1,24 @@
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24444253bff388fb094950889b8067821d9bb3ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..419460794994cd3d0220201486a9c46c452aab8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_24.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ded41bc8c4f56c83f2ad41328257c3b971b9b309
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..907e2406624fbea4ee82747ff3026bbba1269723
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_26.txt
@@ -0,0 +1 @@
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14df98911aafd2d0361c0e72dcd4e12506beeecb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_27.txt
@@ -0,0 +1,23 @@
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e252c6a7204ad698b4011aa91a1036de6845425
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_28.txt
@@ -0,0 +1,8 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b21cddc20c0cd5b19156630b5599703300dd1112
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_29.txt
@@ -0,0 +1,34 @@
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c2f284fd2d03b571bdfe69d44edd308913fe0de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..516f2f04657fb0d10ccb984440ac02996ed694da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_30.txt
@@ -0,0 +1,8 @@
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e2800e3be45fdb6167d3458d0a88a2eaf427b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_31.txt
@@ -0,0 +1,6 @@
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c1c5f5ec5d801babd78e51a1dc06c0d3779bb66
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_32.txt
@@ -0,0 +1,2 @@
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ed3338566f2abaf175d1c21812d0b7b68f7801d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_33.txt
@@ -0,0 +1,2 @@
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd51e610ecea2c387577a26b94763b57cef1848
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_34.txt
@@ -0,0 +1 @@
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..111f18e7d227eaa9645d6bb5782ad89ffb34cc84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_35.txt
@@ -0,0 +1,24 @@
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b923d67c9e4e29a8ffeb6cb96c10e2bfb7ab585
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_36.txt
@@ -0,0 +1,5 @@
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d064c8c87a9d540fc7083aed62317797d7d2991
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_37.txt
@@ -0,0 +1,14 @@
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb059797c47dc9e70171600aa9302b3fbb95e3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_38.txt
@@ -0,0 +1,2 @@
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22ffb1f63549f7bd95a6ad65669bfcd79ad98860
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_39.txt
@@ -0,0 +1,8 @@
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c5d79baad8e9a16d8a101951bc5c3aa577b8810
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a85908e13de0df889f83374a4e271656805b00c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_40.txt
@@ -0,0 +1,21 @@
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4335a03f290a98ba3163201edae32ed0fe28a2ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_41.txt
@@ -0,0 +1,8 @@
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d91bb98cc1715b40d73627691d5e8d401a60898
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_42.txt
@@ -0,0 +1,13 @@
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adf10ebd26349305b0ad376aecb2599ba40b3f24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_43.txt
@@ -0,0 +1,18 @@
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed71b344e48212d91421b01b86532869a51f6f11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_44.txt
@@ -0,0 +1 @@
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8ef00c5b4bc29ece02e15b1510c093101217545
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_45.txt
@@ -0,0 +1,7 @@
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8906426691b59d41eadb67e217d6615a9936be4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_46.txt
@@ -0,0 +1 @@
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0fa9f73adc4489177892ed23c8d7b09afca911e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_47.txt
@@ -0,0 +1,8 @@
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff23a99a285400abb8f238ae57e67d858ef1a924
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_48.txt
@@ -0,0 +1,4 @@
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c34c5d588939e68ab3e261a3dca8f38c037d2ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_49.txt
@@ -0,0 +1,10 @@
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2752aae17d94f6d86f35b9b32570e86626b36e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3fcd2e882ab1206b9412fe8bccbaa56f187d98b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_50.txt
@@ -0,0 +1,6 @@
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..050bbe28df49f7968c20cec7d89d2fccb51e3d99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_51.txt
@@ -0,0 +1,7 @@
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_52.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c84b48d98518a7745a77f826ed1756bfa5a3112
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_52.txt
@@ -0,0 +1,17 @@
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_53.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2172648a50fd221e97d8bbc95402c2b40ee33d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_53.txt
@@ -0,0 +1 @@
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_54.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43c895d5dcab08b36840be44ec1a6028e950b22e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_54.txt
@@ -0,0 +1,3 @@
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_55.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..637ed7fe5416e8a879702f4cfd07dde338e722e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_55.txt
@@ -0,0 +1,12 @@
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_56.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_56.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec9ecd519d29cb851732ad2a08dd10e571d4c150
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_56.txt
@@ -0,0 +1 @@
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_57.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_57.txt
new file mode 100644
index 0000000000000000000000000000000000000000..570e41f61db328a9f782915d8cac58878c4de18e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_57.txt
@@ -0,0 +1,20 @@
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_58.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_58.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e252c6a7204ad698b4011aa91a1036de6845425
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_58.txt
@@ -0,0 +1,8 @@
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_59.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_59.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b88af75ea6ebe11d30ed74c01cfa4a0cf5a2636
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_59.txt
@@ -0,0 +1,20 @@
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6176ba9a5903b585333a61d887e58a139aec297e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_60.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_60.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c10a47f95264b2d7f17826dcbc2a275fbc9ad1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_60.txt
@@ -0,0 +1,9 @@
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_61.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_61.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95d9687a4cbbfc34f368331a404feed49a3a70bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_61.txt
@@ -0,0 +1,5 @@
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_62.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_62.txt
new file mode 100644
index 0000000000000000000000000000000000000000..179f25c3c934d255dc8d1f4525487fdc84fd3118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_62.txt
@@ -0,0 +1,7 @@
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_63.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_63.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1e074639f73b1d9eef2db2e8c5a9063b07139f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_63.txt
@@ -0,0 +1,7 @@
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_64.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_64.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d59e4f8cc97970be39c65460e810323b8408c3d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_64.txt
@@ -0,0 +1,3 @@
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_65.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_65.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b475d0a811cf72ec3521a40ed9e6caede0f71ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_65.txt
@@ -0,0 +1,6 @@
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_66.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_66.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95d199d5108342a6aa6f9abfa222e43545bde4f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_66.txt
@@ -0,0 +1,4 @@
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_67.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_67.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afe0c367a9fb1d930237ebc9f1027e994a3bcc9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_67.txt
@@ -0,0 +1,3 @@
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_68.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_68.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836a98c5069e6fa7b7320e3a7d3f87a6ab8e3f5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_68.txt
@@ -0,0 +1,8 @@
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_69.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_69.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c7f521a423b7623d5079e9250273b7b1e41607b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_69.txt
@@ -0,0 +1,16 @@
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd80ea30e687ac5d99645b305b09d4dd855b2448
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_70.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_70.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89ddcd1feefc79cf858ac988d1080261133008e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_70.txt
@@ -0,0 +1 @@
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_71.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_71.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c6161a08ca234b454a18f54f1d6a0068d0118ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_71.txt
@@ -0,0 +1,4 @@
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_72.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_72.txt
new file mode 100644
index 0000000000000000000000000000000000000000..243f93fadeb4337f6322076d8e9a583d9373b546
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_72.txt
@@ -0,0 +1,3 @@
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_73.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_73.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b67a960a7c953a85fe9af8f436dfd045ee330c0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_73.txt
@@ -0,0 +1,3 @@
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_74.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_74.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7455142392abae82d3c7b53581bb0ea0fa796ec5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_74.txt
@@ -0,0 +1,10 @@
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_75.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_75.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dc975516b6976826919bbb4acaf501ecf205871
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_75.txt
@@ -0,0 +1,9 @@
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_76.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_76.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0e637d826be0355b6b3ac8e753cc8433f627ebe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_76.txt
@@ -0,0 +1,3 @@
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_77.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_77.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a68009d7f2eee527840dfaf84a4e95671524964
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_77.txt
@@ -0,0 +1,4 @@
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_78.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_78.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b62abe66c3dea83af9dfd4b02fd5905fad912f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_78.txt
@@ -0,0 +1,11 @@
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_79.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_79.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffddeef220e0a3fcce920e5607a3c9885319b2ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_79.txt
@@ -0,0 +1,3 @@
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ca002025899cf159a2c0da1bd8359c21a18e23a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_80.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_80.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcc52e010bf6b1d754fb95dc272e3395b9c262a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_80.txt
@@ -0,0 +1,12 @@
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_81.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_81.txt
new file mode 100644
index 0000000000000000000000000000000000000000..420eb5c35b817552d12e4ed676475fab450987ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_81.txt
@@ -0,0 +1,20 @@
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_82.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_82.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4c1275a6f39c9372340db6c2b3d50c25cc83904
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_82.txt
@@ -0,0 +1,8 @@
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_83.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_83.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c0de3f69f35ef4c78a80bdde520e7aed4b3f34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_83.txt
@@ -0,0 +1,2 @@
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_84.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_84.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45c4dcc1f4126311052b72798ff94aaffe3fe7c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_84.txt
@@ -0,0 +1,31 @@
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_85.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_85.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44220c70529453519bbcd6a3485fd582222700f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_85.txt
@@ -0,0 +1 @@
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_86.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_86.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57c901ec5d4e85baa02a124a2e721ba24f60b38b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_86.txt
@@ -0,0 +1,23 @@
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_87.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_87.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be1b5e8c07f70d8a33015ca8fba5582f461f77b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_87.txt
@@ -0,0 +1 @@
+Save the script as t0.py and launch it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_88.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_88.txt
new file mode 100644
index 0000000000000000000000000000000000000000..934c3566574fcb4584c2dc103a9bb5f49a74840b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_88.txt
@@ -0,0 +1,14 @@
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_89.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_89.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40efd4d206c83e66f6012707c4c51a23cbbc1bf4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_89.txt
@@ -0,0 +1,9 @@
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1281a206549c5fb82607459d6ee671533c5ea4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_90.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_90.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bd0b9a29c0c0ce5ddad6921fb0ea2fe17aab733
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_90.txt
@@ -0,0 +1,3 @@
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_91.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_91.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f66db106aa5f83a396f2f7e860ce755c8eb363d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_91.txt
@@ -0,0 +1,19 @@
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_92.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_92.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a875b328b2ad2aba55156f57ef0bd01c84363f72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_92.txt
@@ -0,0 +1,18 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_93.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_93.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb204ac81920817714ac83fcaa54d2569e2f1eaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_deepspeed.txt_chunk_93.txt
@@ -0,0 +1,3 @@
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cacce90a896502b83b06f832319c2f211d555358
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6483636b48d768a397d9d59bb5e0a0dc376ce8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c251871d350465dd20e5e7e6832078e2c1d9e49a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a637dc3ba34ffa7f726bc1d12f38c33e474b772
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1521d37b48aa8990e5d31927a53623cb82e69ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fast_tokenizers.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87501c97668131f3c33e18fcafbe1ded13ccfafc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33fb6b8789f0b0210d30eeed38ac2f16b6135632
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f23aed1a55472aef0de4b64f608ad0ebb13d3035
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddf70cbf7e5fe869c77bd272e8c1ed2bc82c82b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fe84bce2239fbb06c6f87e3d836196441f5ab20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bc6882c72fbc794f64a26c008e8f8ac6dc95a38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9268faabeebbfd711a88bc213f558d0a67861706
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_6.txt
@@ -0,0 +1,37 @@
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e0b22c290596f83f5010e46ec43cfb1562c1b0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_7.txt
@@ -0,0 +1 @@
+accelerate launch my-trainer-script.py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2116dd85011612fb7879007956bc1a6d29037ce8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be42b070d0ef1eb47be030bfe7d57b36de68e2b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_fsdp.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf471bffe12e6a8f0042a54ece0b865e5777c59c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5540eb066ba08e8a9c709c9d19fb8cae8eac5302
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bbb65c95261888d3384e8071e123d3aca5908e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d86f7a903916dd5a7ee5058cd4c548236f27b45d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_11.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d0f9bccfef9fa360865fcdcab6e5c68926c16c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71974ede230439f22019d6d8c72e69512104c95a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_13.txt
@@ -0,0 +1,3 @@
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c679c558948d3728a1c91b947359d1ce041500aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..053bf99a5e240c1443b32d1eaa2d07f164518619
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_15.txt
@@ -0,0 +1,10 @@
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82097e31562854114ed5592f6f06a8467a77c936
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7909d36a065f8bc94882fe136a90cff809f91f91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e6bf57a668622d60fa75ee70c40b7e31a20cc38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_18.txt
@@ -0,0 +1,13 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0acb747e411fcfc085f8b320d7684995fe8960c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_19.txt
@@ -0,0 +1,7 @@
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd0d66fe908b25b426e7d586f69d862ff4b2c4a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af244162aef42252896d96128e8bcf79a3d7561a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_20.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0708089b9860d5f6448e672822d49f19cd1e6cdd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67c34afe674cc6e111b2e3aea0af9ef40310845d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_22.txt
@@ -0,0 +1,11 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1b989807cde75a44e39f915632542d771f15ed2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c2791b1c8ae9b777469e00aa956ffe39865ce65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_24.txt
@@ -0,0 +1,10 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6366ff406c95432c0dab2c6053eb30722370c86a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fae99af063455a36548839e1720387b2b90d15c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_26.txt
@@ -0,0 +1,3 @@
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10c4addea423f7d28cd4d3abb76aede7f07159ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_27.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ecf361159ebff1bc80dc2b71b7a8ec52ac7bd0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_28.txt
@@ -0,0 +1,4 @@
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fbc30286e230817500c2cf177c197cae08b9864
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_29.txt
@@ -0,0 +1,10 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8d8862b5900f6b9e6fcbfd8cd8c32f4b9d2ef0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..653ad44fa79ee886004a4f1957fc11727df2fbd5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_30.txt
@@ -0,0 +1,6 @@
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69a979fe92f2be7381a187d8aa7e82d61c363410
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_31.txt
@@ -0,0 +1,25 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1321303744647bff8584f5d839018821c04ef544
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_32.txt
@@ -0,0 +1,13 @@
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3137a0a81ac119c57851316bacedeb44cf0dc123
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_33.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..814b53d7af6f2b13d833e754db9d5b4f8550d29c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_34.txt
@@ -0,0 +1,3 @@
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0644fcb8854ecb964cb519a69c538e471e46b590
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_35.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49b6f3cba4fade1a03eb20913c5f8f479efaf6b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_36.txt
@@ -0,0 +1,2 @@
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..707361ffc026476be8a5fb885ca1eeb2f103d3b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..058f63f67db9f9691fd6493c278f6ec2fc85d503
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3a091c36b5f8a5a9dc3a108a97d936612987a8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73abc4a59bf8709e5a4665d3da188bdcb5a98031
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da357487f2109c4d8bde11a4798bc53fa1af2f87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d44dc5ced4c7ddae0ad66c84ef49e3bcb45f8449
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_generation_strategies.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b77fbd7736f6ad093430689b97c702dae4d149ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b22cac1aafd9dab7e1482b320f6258d202416ce2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bd59e84a909ae5e926cace59757e16e9adaeea4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c430bdf720ce13852f41db6e2ad5b6e0d0713e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92328190b59b00f18f896d0c2695cc2c3de86bd6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b10161d783f8b33adf12dee285a73ef8f388228
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_gguf.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f95e8d08af27ee38c748c7c23f94f65c77d8827b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53fd6ec94b599b7b5271c68a470a2473b29dcaad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b67d848e9f2b346e8b96939712f3b817aae58b87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5ee2b5418c54a80b123d34d979e1b540170c20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_11.txt
@@ -0,0 +1,12 @@
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f8a755f7fa4c4b053084e165d8698534cd9d8cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5533b97794949723fbb94ba379d4e3bb3a559370
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_13.txt
@@ -0,0 +1,8 @@
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bc57dd14383cae5894e428f629a8cd8026a78d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b152a7b9337b91953d75d003130dae2b186dd872
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc793bd6d7a7d8832f9a2ff23509938935cd1733
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_16.txt
@@ -0,0 +1,20 @@
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e145c478fbbd47503c2e78d3f6ff6390eea7be05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_17.txt
@@ -0,0 +1,2 @@
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3ccf13a171c4458a8eab492e4dd083971d09af5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_18.txt
@@ -0,0 +1,83 @@
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aeea41dddd3c5ea7bc03c1fda64a7d12a4e50616
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_19.txt
@@ -0,0 +1,10 @@
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af54167ef7049f47fed35bd3f9e1c924121c361d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e408e82b1fdfa866eb1fb1994c26dd2cb6ce0ed0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_20.txt
@@ -0,0 +1,12 @@
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ef0ce8bbfa8faacaa57a86454d45650387714d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_21.txt
@@ -0,0 +1,8 @@
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b495c3a64a11dc1e52a3e9419fbde0ccf45ec88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_22.txt
@@ -0,0 +1,16 @@
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d32e218dec719ee1e60008f98e703e2bc389786
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..413ed56fe0b1e45361ee77290ccd67c1fee98655
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cacfc349d3c1657a6a8bbb7023f5b1b4e3c990c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e25d6b02672aab3a508d48cec27b5695963a910d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_6.txt
@@ -0,0 +1,32 @@
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26fe9d8cef7f868f53c10056a83562d8c22c0b42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee710f8b81aee319bc299e7b0a50f5fcdd2527ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_8.txt
@@ -0,0 +1,23 @@
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7f3a4d062ce2016133ab322e241a3beb344cd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_glossary.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c9e3b2612fffe5ace88d2487be32e126da2c6da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c5424b526eb8aed7e828f1ca31c27bc8d46d118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a357dd98be2cd2d2e403e28fe6864c997a493032
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0d8d707d581bdbf284545cfff587c3ebea98a05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2e718b09eef3876cfec7afdbdede3c163eb8860
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..351c6da650dcbd7c19cf249fb9be35974b1c0910
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_4.txt
@@ -0,0 +1 @@
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..876a92305aef8354177052149793b6e062cb326b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad67d977450af0ab043e0224bcfe726582420946
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..807a268c9f553100bb839033c0142e4006c1b189
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a27b06ae5878e985f90c182bf7cf0ffe2b6da87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec5bf5515d45876692aab25144d8c21f87dd2cbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_hpo_train.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a09536e8e55ae13723c9ceeda7500f18e069c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ef8dc3be36ca80076011d86255cd96bb59abca5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28f4dc248bda91b8e093201154751dac7a77eea4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d45592ea12f1d1fd5dfc986dbe2c018657643cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e52b73fb8931bf3b14dd602154095df77e20df5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36928555b324a33e347e0f463211b58c1b8a1bdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82cd369044bb1151dd0185dca9f823404bdbc512
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d23efb25acdd8475fa8e887ce18f8f37e3a58c75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..866cd85cceaa5a297d5422b480d9d24397ed607a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9426bd939746aec6ff8415d9346697b11aed00d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6d8c51582d2011dc9b3c96171821220b9ffb9bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa63a1f0417112275bedadb10527a56e60e1b799
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..287ad54b0c439e46cfdd7067e778421283e37f42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f11bc7e8afd99a1bd3bdf1919970a999452ec7b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b2cac6f69815a2713d9d4a70dd3d507bae4594c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e5b945a3cd98ae2154f4d3163f13b27fc67fbb5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e2b1751f7b2aebc46df4b12eb4a93d0fb0af6e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d648da20ba0ce98a268135cf9f90bfb3d9960142
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_installation.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebb723924e9ce4f5abe4435cd4617c33a5644983
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10c7d0ae09e9011d9aaf1ab205457bf7c3720bab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52d741d43538f021671ec877a19b7ddbd86abda9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b5b9beec33ef1169ee9e706223ea0d24d84019f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_11.txt
@@ -0,0 +1 @@
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..773183fdd6a0a724b0a5110f2416a611a6317cfc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be8c51f1363b791713f3d7da11bccd3908474639
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1872034fd80bca5e038e7ef6d41a03a5ff028bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_14.txt
@@ -0,0 +1,4 @@
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73b34e53bc5023d646a982c01aeea18aa569e67f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5274117eabe096ff94d7fcff31a487075e4a7927
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cad008cc850b7c06e5d8f16fd68d1a64803db7c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d281481320f779f25100738ca2cc10f7716218c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_18.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73950ea1a40900ae3b415661045f1e1a8fa56cc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_19.txt
@@ -0,0 +1 @@
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87566bbcdd4df63b7536989de59c2ae0c1d96fd0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ce424cebe9baf42a7c0b5366b30d347b03a0fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_20.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df9bfda464fa9fd1bf79c319c4191ebf43f5885d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3c8c75d1349448b6d16cf816c55c6f48a025a09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_22.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9343d68318da88f0677782b50d84325639f85f82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_23.txt
@@ -0,0 +1 @@
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fbd181d601b73b1cb637c2f7a533105647f05d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_24.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a83ab590b99389260d7b913bbc40ebb40c261967
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7242621258b412712aa3f854488a60f5a3433be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6924c39b69ee9b669019b2df8405eac1038607dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_27.txt
@@ -0,0 +1,5 @@
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ce9abd1b0fd7573589d7c632e3e556196355b2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_28.txt
@@ -0,0 +1,10 @@
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4295c237feb4c2c3d380551abf37832ae531825
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25c88fb4cd0ae99d605fcc84907b077c29c4ea8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89b91177fdab8dff493278922c6ea90158ed903a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_30.txt
@@ -0,0 +1,2 @@
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3146a9044f7abd28e9ca4e62669069b4c5bc96b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_31.txt
@@ -0,0 +1,3 @@
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9854166e5499810bef7c5709703e0459a7f1a7ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_32.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88db87268c7b8149e71a37f2d38c67c385bf9676
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_33.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fe752f7aec565026c45378f471c32cbed794e35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_4.txt
@@ -0,0 +1 @@
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f723e141542fec6eac6de7cbbd5831a86ce24b38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08623e5b6966f48c3753f6d3151c5ba18d7be45d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dc0ea27401c243add0b80898e8f7954e47708a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_7.txt
@@ -0,0 +1 @@
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da3f1826c9e16cb49b58361b92bec4fd1339c22f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_8.txt
@@ -0,0 +1 @@
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a00ac29868391bba12cb2f5bab3bfad1530d941
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_optims.txt_chunk_9.txt
@@ -0,0 +1,23 @@
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16570918f53a3709ddbf068300a5fe18fe7cd76a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..057780845ad5c28f7e6c0d6b0ce74ddf957badef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59d1da4522368b4dd46649311f8ed34228a5383f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28174f65e004447c6fb473e6a4d9a21398bfd99a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a37c344d2d5fb881c2c82acc142bafaae6e0fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_12.txt
@@ -0,0 +1,2 @@
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de38092eb7a68818a02b2bffca9005f5b754467
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_13.txt
@@ -0,0 +1,12 @@
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f4251a4c350cb0f1e1cf49b2dca8ebc189cfbb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_14.txt
@@ -0,0 +1,2 @@
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..caeeca27201cfb1729e9d4483a46395066e4237f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_15.txt
@@ -0,0 +1,17 @@
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f9439576690905ae3ee81594d69148ad1dbb6f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0326e7ce2f9e6255c4a883f7361d5c6725132e1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_17.txt
@@ -0,0 +1,27 @@
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..252d66b4fef0361b58b16781334092e6afcc9edd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_18.txt
@@ -0,0 +1,3 @@
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60f6eaa20f3c8f3108de09cb4f652ab8bc7e54d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_19.txt
@@ -0,0 +1,7 @@
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5ec6a113c8423b66f49e4131f3c95be3fd4027a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..026b1966bc1b70ae4d2368caa489fd2bdafa8d4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_20.txt
@@ -0,0 +1,11 @@
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59ce748069c92286a038339cb9a2205a397a3f89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e82d17921ce3592505003106adb908a106afe4a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7db075359d2aeb933a0c85779018e9e707b337d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97217d5b34c645117d826f2008c01bce126249f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cc2bab923dd3596cf348f1370748d9e0a9b51c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c0887aae3bac0ce5ea0b6b4f8adab7f25fdd1bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a9b7841f3ea4c2dd1ed4e614de688519344316c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acc176852dad02d5c21b633100fbef34c77f64fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..675f3a6a577f838847567a3be99c5c9616c4344c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd9bb2f73545c03673484e5e6973964bd3ad2422
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..644b54d008d2403a17b54dabe313306aae365b5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a41a76faa5360c493ff4588e2ef65ddfac96f23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f04e16781c351bad3a7f197c83b4b8ea8d4afea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff90c3624cd5e3681b5ea1ab4b9e182dac885907
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f4e47dd8c00fcf87162fe854b1c820df838b81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccde04c9a8df1227c0379281649050fd59ef699c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
@@ -0,0 +1 @@
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a51f8668604b3947383fa9324571325ea843e3af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
@@ -0,0 +1,11 @@
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a39eeb729969fc4d51037b11949553452f522382
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc880f9fc840f475fe142dee3fcb1d2eb33067c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
@@ -0,0 +1,6 @@
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..897c070c9c487a8d088049e7e3ef402f6f782c65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
@@ -0,0 +1,5 @@
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..873c21d9e4a48567dd297fd527dbae2f54898784
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1366385118e20d1e226f9243d8bd7b2211e56ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
@@ -0,0 +1,8 @@
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8e5c330b7afde65587662aa7e5e8d27c8a7a8e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a58eb688da368e1661ca917b3f61593fc4653657
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
@@ -0,0 +1,21 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b817ef312530a7648589f7a3f449d22447c0745f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
@@ -0,0 +1,18 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ea57f221dbf074a7a4ceab1ed60171e32c3081e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
@@ -0,0 +1 @@
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a2595db5b6103ed8bc395121b22df47aaff9d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
@@ -0,0 +1,21 @@
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f16cf196728bb76ed816493aeb021b9af939e15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
@@ -0,0 +1 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cd188c6c32fae5c2c53a6d55019c511a1f78bb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
@@ -0,0 +1 @@
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcfb792c4da0a899e22d2d9b45987c858b74a9e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_28.txt
@@ -0,0 +1 @@
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..969c50099a3ec1e30bed601ee56b0ad9d6b27240
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_29.txt
@@ -0,0 +1,13 @@
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3151a84c5be62079ef48dc0c56d801cc74d88b4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
@@ -0,0 +1 @@
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7736e2f1a78ff20d646f633e5ea26a3162357b9c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_30.txt
@@ -0,0 +1,25 @@
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2e00375b3bff75724657fb4a45b67cfc90ad9fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_31.txt
@@ -0,0 +1,11 @@
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8a3c3b3aca012dbebc393edc0ebf6d26c275202
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_32.txt
@@ -0,0 +1,9 @@
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc4156b388357665a8e1cfa6b6044d048d143add
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_33.txt
@@ -0,0 +1,9 @@
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7909381b5c56fe610966c5980c836d1ed182b28a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_34.txt
@@ -0,0 +1,10 @@
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ae3ccca926c0de297b31ac6a0e3bc21aeaa744
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_35.txt
@@ -0,0 +1,5 @@
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83e7183a5731b543d1788f3d33815476e119637e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_36.txt
@@ -0,0 +1,16 @@
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4b216a0edc6bd8f00eb96d63a888e540ba66a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_37.txt
@@ -0,0 +1,2 @@
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a6a1c76861fbfb4491e9cf95a672e309be5bb6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_38.txt
@@ -0,0 +1,7 @@
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ec60e22b658c65e58430554084ad33cfc448288
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_39.txt
@@ -0,0 +1,7 @@
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d6dbdc0e04f67002076a9c6bc4565cd7fc4775d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
@@ -0,0 +1 @@
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7be27d403c6876cd6ce76e38c23d710744d5373
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_40.txt
@@ -0,0 +1,12 @@
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..733c72ebacb380cc3801c2a12e8bba8cbf10c6f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_41.txt
@@ -0,0 +1,2 @@
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c0d91ec2ba7827dbe30cda3720d8e739d19ed35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_42.txt
@@ -0,0 +1,4 @@
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ef0bf486fb676f69d1c7f54a43325d640c21cd6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_43.txt
@@ -0,0 +1,4 @@
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd80752962b8ab1851370b83c68a984498ca2f85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_44.txt
@@ -0,0 +1,7 @@
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
+
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b183fc085e4c75b4fd80ea806dd34625ead15b33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_45.txt
@@ -0,0 +1,5 @@
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8efa0c6d664d7242879db36afa2f52a464175df5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_46.txt
@@ -0,0 +1,5 @@
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..200e303aac7bdffcd1accb79d6c6a5fe3b965d1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_47.txt
@@ -0,0 +1,3 @@
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..088406832d43a8e6df8e6b9b2bbea1a44b42d3a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_48.txt
@@ -0,0 +1,4 @@
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..355bd52329d386e8d06aeb2bfd4882c7dceada19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_49.txt
@@ -0,0 +1,14 @@
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ce70df4d0238d7f0b6db2c47d06341200a6a847
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ee4ce614728368266aa5ef2862eac7d96e5718
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_50.txt
@@ -0,0 +1,25 @@
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0edab013d2726fbe79dafc79adf831226b81deef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_51.txt
@@ -0,0 +1,13 @@
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_52.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc0d802a320c21d5aedceb9fb97f9698f696bfeb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_52.txt
@@ -0,0 +1 @@
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_53.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea53e940c3f8f484e04f844e03f2655352aa5a6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_53.txt
@@ -0,0 +1,3 @@
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_54.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa9bb74b15f4961ba556969f800422ce8855bf7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_54.txt
@@ -0,0 +1,3 @@
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_55.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2dff01fe1710c441edee25089ee37afee9d2c03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_55.txt
@@ -0,0 +1,29 @@
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_56.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_56.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ae2abb6d51db245a789dca9328a92b115abc7bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_56.txt
@@ -0,0 +1,6 @@
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_57.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_57.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52f12bb15023a5b63b5758790d6864efc81b94fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_57.txt
@@ -0,0 +1,13 @@
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_58.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_58.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea57ade8b93978444d6e8c827b7f23663b4c986e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_58.txt
@@ -0,0 +1 @@
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_59.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_59.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea2bb1d4272334baade7892c1103bcca9a6b50a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_59.txt
@@ -0,0 +1,5 @@
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24fa024776d4872fe99da83bcce1b27da5a4540c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_60.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_60.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b7a8b7115fec59689403dda268a91a029249c1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_60.txt
@@ -0,0 +1,4 @@
+Falcon
+PaLM
+MPT
+BLOOM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_61.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_61.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07ba05a2d9382b5db1eb769b7157a2362db60ea5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_61.txt
@@ -0,0 +1,6 @@
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_62.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_62.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32fd9bc17386098dda83b0c8e00710c2e42d10e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_62.txt
@@ -0,0 +1 @@
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_63.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_63.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5bb79fe3d158cba255808b39e579a7434b434a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_63.txt
@@ -0,0 +1,4 @@
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e0f935e015edf5ae28fc19272632b04e0594891
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e6b7ce7aae44b0e8f855fae0949042c4debb158
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178735546e32311243384a673d8a6cca7cb87359
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca5d7a3656745a33890f384e6fc7a89d435da61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ce291160a34e741eb5514c8d15f4d1ec8cff359
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a3e36b17cadd6ac02db798c4f620db1eae7286f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe5b80be208a6dac354ade08da24dcdf6ab064de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b81a701835791e3051773de4b7e30e4f099938
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_12.txt
@@ -0,0 +1,6 @@
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..733d1889cf884c4eb77d01bac8f0f1d600dd40ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fd6aa389746dd32c9a87773f65294dd86988012
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b31c853c6d5affc8462ff780bfb160e2e8595a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_15.txt
@@ -0,0 +1,2 @@
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..011836c36fca5b2c2781c4177127e06cd63d0bfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb976bb5b36abbec77961dbbac3164b2be31e547
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_17.txt
@@ -0,0 +1,12 @@
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7720e6bf25de2f576748009f39fdc272618016c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_18.txt
@@ -0,0 +1,10 @@
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6512f4e91e321c360cc4304deed56512f7c21aab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_19.txt
@@ -0,0 +1,7 @@
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae653e16417e5b1b4635cb8cb8b4a8268abc63e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2672a09ed013ef76c7c606f7c15431152514e6c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_20.txt
@@ -0,0 +1,19 @@
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..823ec214b48efb2a8a435287bfa039dc77092188
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd43d2066a6299992b0684897db5a6bdbc7bab24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ae8d42062ec5bf9762e634e7eb8fce853edb84a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4faf6afd31eda4a757610b62451ee3291260f31b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0537ba4f7b605023cb65c8d95dc033e0debd6b9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61886ec2992419711174d43cd22e3100284cb52e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b6c790d656e641c89d98aa2608256c78d1216f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_memory_anatomy.txt_chunk_9.txt
@@ -0,0 +1,21 @@
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..164e9476319b8b85137f1cebc96401c0ecb22a15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15b3302dafb2a42866635487de616f50471a2e4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98f523ca1c7ce83e9399aded25794e3a363e4548
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62549dd83c80ca786eb67af12bfc3f95d0b14540
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_11.txt
@@ -0,0 +1,14 @@
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62970aed63b2288749821729553200a08e5ec4a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081fe6693c0a144a5e05cc2cc44b270cfa276c89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a190e3d81190197ee2f6f32568adf1b475b8187
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43548e4fbd14d0e93a762bebe7751fda6b38a405
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8dd943c68c1c169b5e3d127824219afa59e7ca35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..061bc853394a4bff7e2233e3259ba0d346bddc0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_17.txt
@@ -0,0 +1,5 @@
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46f874dfb7243f3ffc639c725d87b2df6b791081
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_18.txt
@@ -0,0 +1 @@
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1341eb84d0b29045b611047b8f112ea0f6375563
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0092de75bdcb64fba34d69244ddd40d2b6d0b696
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..affa3b29f466b89e94318d13c678d937c8f25b8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b807a3bd88928b72be963d7359662ee54080419
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9602648f11088a01473abc9a149d8f1717ec1b1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1e9fdc0ade7445e8374097b81c556e33941d33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ab88697e368c4bc691c04d37e1a0144df97a827
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84abee4abdb8b0b77886e240580d8b31eb7cf7fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_sharing.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f23f0fa791794ed4974aa1e4f33b176a6c152262
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43ffa4ebc43d944ba96367841a224fcade327484
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_1.txt
@@ -0,0 +1 @@
+Computer vision
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdf69347b299c791e0a2c6b332cd893ff7f071ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaa7831319b5f0db38043c15dc7739d9fa9ba527
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246f3acfd40a721b54b7d354f8dad610eab8849d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..669ff160c87f053b9bd2014eea399688c52637cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a20ad9ed9b0732a11344edeb865119e3c4d7662e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_model_summary.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..367598457b9ce8f2da951b7606a02baa4c6ab2b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dd631c2dbd6fc95a9b16701a3606469b068243c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..915c1089cf5ea04e11305eb521d8fdfc9f26ab0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22b8e6ee03835c83afb1e48575e788175abee340
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fa082229fa3e4758d42046691a51c504f3e343c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fad25f42a69a6f2b1d93768c9a0bcc95139b80e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_13.txt
@@ -0,0 +1 @@
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fff2c8d930fa08809e87b6206e1de5df19d7f22b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..998698a8b68bcdf5016d81fc27c6f8f75ab5c94c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_15.txt
@@ -0,0 +1,5 @@
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79ebe7063a41a5f2e26d8ca71e249d4027b100bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b04582bf1cca814aa784ea39291e7ff740de1e7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..884be078ea5ab32b0fc12cb2e667ab4b3fd8a32f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89a1e6f133b50a3e39fd737110926723b9a64873
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..624c3277e3ed89600fde08ae0cd24dfee390be01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b42db875c2502d76c7682b761bb1d324be38cdbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ae8d807e1a27a2ae40dcb6230bb9254a583def7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d61cf440dbf6a80adf5215412ee573fd70ad7b46
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d1153e046c9b8d3a62e9c1aa21251f51166ab9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_multilingual.txt_chunk_9.txt
@@ -0,0 +1 @@
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_notebooks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504f04ccbe55506269bfdf8ef52e645c1fe26e15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,102 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2fdb0444d477c8ca7aa30ac91c5ff1adb71c2a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6692ceed60dc0885d23692d085f7a5b6ad0f0e0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e638ebfe7fd98e5fb5d4c4b65877154715126b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_2.txt
@@ -0,0 +1 @@
+The truncation argument controls truncation. It can be a boolean or a string:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bda640047fd22c3afe29435cac50c4b8c91491c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f26429e78be0a4a35e3b1b147f2fa0c644164eba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pad_truncation.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad17b78e6df4b8d366aa3054a2d1cf9657f8d3ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaec80bc50e6908f80048e0170400588cfeb4ff1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5769d1bb8d46c9ce7b79c815de2b03371ef43f3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb228f471a43aabd176035ad041ca4da871f685
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a8fac217be8c361d231fe60f7152b9c8dc255e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ab2bf126f733510695c4bec332f736d1ee72984
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_hardware.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33e4551573eaea272c91eeacb9c57de3e19ca465
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..604818fbe850748513af8416a3811f0fa734282a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e23a2dba3baf27171f7fadcbf4ad5787013f5a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8f6b087618ea71f99da3c7c8f46729dcd39e8dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_11.txt
@@ -0,0 +1 @@
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e23e4fc16ad30292bafa015faeeec98ec3159653
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e64b2f24d7aa3d4f8ca0e64a08d51f1fee19ad09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246ee50a53aa0c544ecfd4264f2c2a71de52e3da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6345e5050fe22cc74a9adf1e66dc73aacc9c4d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f38b4ed9cd987649b2401b70b917e6002257e9bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b5e5a9da4cc561a040a66a8fb1fc929b0df0bbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec04ea7759a308228a31f20f38f337716d21b254
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_8.txt
@@ -0,0 +1 @@
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf8f2f7ad0ee615c81636d6106991f5595073e15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_cpu.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3707907b0e646270d24aaeffd542da62c953d640
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1aa7cd9c71281b80592f83e24620cbc0fc8e92bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d860bcb6cb1d0627220f1ea462b73aeff3f7f30b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d5a5ba00a5418dd84bc1e168f68ef1291e5fb4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_11.txt
@@ -0,0 +1,3 @@
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09e8f531d6a18bfe71367af50dac8e33929e992f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_12.txt
@@ -0,0 +1,42 @@
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20da56c412609fedfc787ad04d2c00cba6cb2990
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3ac8fc7a60593f44c61f14c1ce535e4fe933b8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_14.txt
@@ -0,0 +1 @@
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efe7a0df04abf60fbee41a41b5bf1cdc7a444665
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a90a12c54eb7282439e1c9f84ddd18c2ee0390e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91ee0b94806adfe92178956d97765acebbd5dad6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_17.txt
@@ -0,0 +1,3 @@
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cba48777b41dba31a9deeb5401a14f553a2db2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_18.txt
@@ -0,0 +1,3 @@
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a3beeadab9c9346b3940ac81db5a3c224b8da98
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_19.txt
@@ -0,0 +1,2 @@
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baef9c762febb4682716cd0757d9465a47614e25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf8218a15c9d352d21033eb74b0dce0826ed5a90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_20.txt
@@ -0,0 +1,17 @@
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f56c31587fb50c32a4769fbe2cb8fa87b877704
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..108ea44a6a88d189a07d66a69c4716cad212cca1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_22.txt
@@ -0,0 +1,8 @@
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98b38265882d31f94572b04503febf1e78053b22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_23.txt
@@ -0,0 +1 @@
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5542e4ca86e4d0520adb1e6ebc1ee23e9e80d2f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_24.txt
@@ -0,0 +1,5 @@
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c78172bea8a32a9935f66c0688e8f10cda4b0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_25.txt
@@ -0,0 +1 @@
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cff2b060ce8846f3f47f38b633869e077d5687b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c0cd2182b93f70f1fd22786a0167fa1effdf5ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_27.txt
@@ -0,0 +1,7 @@
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f49e2d766a2e31e4abc03f5724874fadcf114866
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_28.txt
@@ -0,0 +1,5 @@
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b5d4a32db5ff108c09979eca3efcc657f961c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..888cd6c5273ef896699982f85b1bffdd8ea6487f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3229ed15d3044a96b2dbcdd8f6ad141797bd90ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_30.txt
@@ -0,0 +1,8 @@
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c63cb8386b62690929e576e59baf0c0668696dfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_31.txt
@@ -0,0 +1,7 @@
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f1c858fc076a8b7f78ddf28124bcfa5c6beb397
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_32.txt
@@ -0,0 +1,2 @@
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b92b91dea10ec36854d71a67878834e857dd0038
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_33.txt
@@ -0,0 +1,18 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16ad27ad4f6460e46e920e78d65acda9cb0053b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a872d87561ae7217ca0bd97489f1b36a3618ed4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9708482bb85ee39be043883590968e927e21ed9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdb970017dd24792a9a6375aaab864efc4578dbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
@@ -0,0 +1 @@
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..028433fdb6fd49b1ecd4781a5b643ed3200d472e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0daa7803bcd9f58159c1d9a0f3edb57566d839a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f753617e872fa2cf7ddca71c6de4c124bdf313c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39ffd9c44fd2d00d315bf18ee5292c4fb3466eff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8413bb5976cd055df3bef80976903789b54804d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8738fe0ca5026968f06773bf774bfe287b817e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a991d2ea44ce9121791e65d8d447392722efa39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5c40d41802a5d3395792c2f16ea4c4c5fc8e1e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e7eafa6f3ba7859eb08a1dbe7dc8e98a36cd46c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_torch_compile.txt_chunk_6.txt
@@ -0,0 +1,183 @@
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..027ea679797de32a8e02350093db63275786a56c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..952b2ce80fd2f1e4247bfe9611b18964a165949f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1e60d1f06cf7d5075eca5ca4d2ea04fc78a8670
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aebd71a9d89e7736e7d9a4375f6e230961303dfc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0eff5e206ef26cbd82bc0ba6ae0dafdc532c092
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
+
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aeacea17ba52263deab0e6c8d4672b89bdc1771d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f604a04d19dab27356fc3b6fa54c2cc0c7032743
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_11.txt
@@ -0,0 +1 @@
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1260eb52ef6830083c5763dec3f3e138ac40c6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d55df709f7f5482b7742c0911b03bd4bc81bf396
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89216e89d5fb200dd2652a18af7709f84e73a03f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f76ee094b94f08584c43d78d190ffd94b5bb5e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_4.txt
@@ -0,0 +1,76 @@
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e4bca254425e4ed1176a53d5e1785acc61337dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_5.txt
@@ -0,0 +1,76 @@
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f635b2893ffdd58028f0d20fde0acc271b64b0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edcccb1fa84618ca7c2079b281e571a4e301ea25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d51e62a8ee05d514b0a32218c9c5f6cfcb019765
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..117aed36650d67aec122c32d8baa09845534839a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_cpu_many.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..160e58d1831971c480005f44dbd05f0c80be82f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cf2776936eefe0f5cde68cd2191500cea547f67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a2584a4861236fb0b1399059756b52f9f62d915
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..285ac47dfcc2df21ecae1710e09c6720b1eb5969
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb4ff9da1b6590d1a3b0517033c1ecc3fe975288
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1f6e94c56fd6491ca6de2a5acc91daf5556cdd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7cb3465c0b6a2972a19b624116e459fd0ced594
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6b5fe3bc04eb3e05c126d5892bc0065034a3498
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89b1b1ca80c49746b09a170d5183bb89cf9e2178
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da321ec4d7ae7ddf9f6d1e1ebc3c7574475c176a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c88e827e534e110d91ee6c26b577182ede133ce4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96d630262d5bd6b5551fc2a88ffcf094098d1dbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99e63fc1cd4f79f84b67c6a177405352e9c71cf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..021265f1e9d017a1fbfb10b53dde1d4eddca0a01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..844b667c71f64e35a53c64e1f760ca99aaa049f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_21.txt
@@ -0,0 +1,12 @@
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d73dea609a7bd4b43d0d070667600d4d436a47d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_22.txt
@@ -0,0 +1,16 @@
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed4e9a3d3ebe23290143c93dd204278e5f025be6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_23.txt
@@ -0,0 +1,49 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72e2bd7273857e8dbf072d526ac42e339cd8e119
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_24.txt
@@ -0,0 +1,11 @@
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..325e801d1a4b4494d549c68a4c7eb94006399fa3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_25.txt
@@ -0,0 +1,2 @@
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2a769b37d94aaeece0c97674f8ea1882e269ef4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fd295bd6328cc05a0bbbfe87180ab498083c17b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_27.txt
@@ -0,0 +1,21 @@
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae0d9da4c36f85183f4ff0c2aa5d8717416c6ea0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_28.txt
@@ -0,0 +1,13 @@
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0eea8bba6581daf1a850db396b4a49cc3f064838
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_29.txt
@@ -0,0 +1,27 @@
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae81c704495ffd9d99ae16e77f6bb0b1c6ae0c20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef680a3094654f910191a9d12256ebf0bbe161c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_30.txt
@@ -0,0 +1,7 @@
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed8e6b32988d5b5d23eabfee44dd1c850ac4b092
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_31.txt
@@ -0,0 +1,10 @@
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bd096c2a4578e0eb291d6211053a202b30b4abb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_32.txt
@@ -0,0 +1,7 @@
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4b9230c1b327590336531ea3f2ad08d8ff87897
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_33.txt
@@ -0,0 +1,7 @@
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..897c2735b6612c1a634f7e12a8fda1dcb5cff63e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_34.txt
@@ -0,0 +1,14 @@
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a1edc7ad49f5bc001365c678189b10c610cc65e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_35.txt
@@ -0,0 +1,11 @@
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2e5ade395bfe0debeccb3259dceb3a2f859be2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_36.txt
@@ -0,0 +1,4 @@
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31732cb52493c34753b16551f0a21a9cb928edad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_37.txt
@@ -0,0 +1,5 @@
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbbdee48b2d8151fd0faafea7ba736a97f232da2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_38.txt
@@ -0,0 +1,3 @@
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d5215a585c117acb3fa4e849b969ecb757a75f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_39.txt
@@ -0,0 +1,7 @@
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf4a6b3173ed8aca46cbba37ffce7d3fe9f4fd20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca17cca4ee5fe4ae723b5048bec0963e6881ee06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_40.txt
@@ -0,0 +1,4 @@
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf2760aff858fb34393f99026a8ab3170c5321ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a2580aebe5c7b8ec274db70c35c721bf5128360
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df0abca8018dc9ca3e242ba9036b308489fc6f4b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59993883e19c3992a6dced39b4a8157e7737f5c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba653cfd520ce152c9e9cc7a06293e799f4eb90e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_many.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23c419f713e8ff49853ab50318eb02c233baffaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..363ded6f4fe3ce4807adfddef2d5c0c34d3143c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..850a2cb646becbb9fcd91a9d57b81517cd167b42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_10.txt
@@ -0,0 +1,12 @@
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e60c9596aac6892d7caa9df36588825d88d21aa9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdfab93d17c76f2c5f2d9704398932a0847fd343
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c02217ea2818416f86c6cdfa54e3792bb7ecfa30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_13.txt
@@ -0,0 +1,3 @@
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a56cec5e66dfb2f33e84c9a86f48510dfa6f55eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_14.txt
@@ -0,0 +1,49 @@
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99453f15d14b4fcd96f2b26374064282f7dd537b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_15.txt
@@ -0,0 +1,15 @@
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a425ed6889fb428081f84643bcbdb4e77df41ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_16.txt
@@ -0,0 +1,32 @@
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb0279cd02f2e027b1babfa90cce3aa3193151e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1346b5e5c942ee04473e7ec56362188f5ab50a0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_18.txt
@@ -0,0 +1,3 @@
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d18ecf7075dd816c34b5e4d22ffef026ebe79090
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_19.txt
@@ -0,0 +1,4 @@
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4aa7a0bc856139e35291af0efa67c99801561a8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d5b559f7b9940724fcec35349d4b4a8183bcad8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5fcf1e748d91043e9582c993a69f845fccbe3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,59 @@
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a78c2350bdcb205c81a55e83da9f656c64d4823d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_5.txt
@@ -0,0 +1 @@
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44dc747b90bb6b6ffbcebf50ea57db7f8e828b7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,45 @@
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d499106c57b0a0eee1de6d4b92d17563de8ee86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_7.txt
@@ -0,0 +1 @@
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7e20d25d23e99d2c66385108980ec5ee3d6968
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,43 @@
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98143e8a36c8c8d9dbe5fd8f4c3b1d3d1cdda9ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,27 @@
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e74f59b076e98bef54101bf6cc39d1d3f6ba914
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bd5bd47c585edea8f1df101561a935dbca817b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b73278291b0010cfaad155293b1fcc88c5f580a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5eb2769c07f1991bfd199a7887706c585696ff92
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3b476a314d9e0ca7219b1bdbff48666a9c3a4fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1283c4298d864dcd2f960b20d8e2e941a469d87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_special.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e07187c9d8989e7ca5774f47540611deeab88d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cb2387aa4ccd602fdd0e2da0c3979c6d645f432
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b86e6b100a9b3d56fe0b72809ebb877037842d06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_10.txt
@@ -0,0 +1 @@
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5709115e635bc0696a1c170e9ad8216c7865128
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9689fe76f26d1388c15e7a9cc0d6b1ae2b1ba505
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e08350e901047720ee3ec2f83abecd64d0ebb4d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
@@ -0,0 +1 @@
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f70ea1ab4158c63e3c1601c06d0b4e2e51173a8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
@@ -0,0 +1 @@
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a724dbf6970d241b6aa305e367940ad3866b188
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398b3583429a4ac8a44935a2e41cb91f1a2adc7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
@@ -0,0 +1 @@
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e792d632741e7250dc1124323d60e87f91bb4552
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
@@ -0,0 +1 @@
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..708b7f4ceae1818549409cbeed8ef8cff291596a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a8109cb5cd0efae9f74abba23e4e37808879fb1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_8.txt
@@ -0,0 +1 @@
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a2999cfc12aa301b95dafea6eeed9fb93bc3f29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perf_train_tpu_tf.txt_chunk_9.txt
@@ -0,0 +1,30 @@
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3a83a7b76a2b6fc4f7e31f0889d4fce4ade557c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f24c1ff96746e037f5fa5871fed77932cf63b689
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b30bbdce7df91dab5cf6de4b4a08999ec4c59eb7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9db454ad0dac55a932d64e23f1f38ea47b1bd54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_performance.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..404ea13313e123384d90c48bfa8157d2ef0ab5d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75f3367112af9c6660fe7a113df0210580e6323
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ac294961119972d1f154a6fb2a1e7c4afb10715
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2be5a427ae5d8e7cc6a73fd22e8b01d974cb101b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac98207ffc71020c7a7601bf43ed1718792d5e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45e86bb294fbda5359638ee4c153ca7a32ae498a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f90ff1d79f98fc426aa1c4e22deb723eea0d55aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+# loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa67e03bb7cd4ae7b329851d1d358ba3cf4a861e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_perplexity.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88a7e5fb03d6c62dd69918c47ca570d867fdd3ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1d8359e60c7cabd349c9c4a4d89dc497ee39abe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3496e7cc322049a09d2e12c70b7e7bd07e25c26f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90292aa80990e86f83c9acd68027ea62e47ce25a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd832b2b5530ff42918f83bb61212a40e51d650
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bff44e4339999b7f35e5be4e290c8bb3969bf99e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6334fdfe17038dd62164dfe7eee9a02f5e715d73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f21ce7b01954fbdb5c4a7f8330fcf5660b49fa5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bf51b6e5075acbaff5ec832e28be02fdf90992d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_8.txt
@@ -0,0 +1 @@
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7382ede0fc5f75a55f892dc8141c6d1adc2d4c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_philosophy.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cda490cf951f75840741d431f8459e53db2da4d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd4f8f89463927b565b5b2e96f836cd4f5c8541c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2dbff86a6f08f315612ce2096528c6f70c91cbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1979ca745730f8d8a5f131c30f50795899f05816
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9eaa6aba2374f4cd5a298445d3e83b441e5971fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9cabf02068c933130c4ec774b61b97cbc50b8c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a036b983ef100bf7d92a21cffef408df4adeab2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53050e3c5e8b5a4571de00db03625e66e9162cbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31b32fbbe0272e9920032453ae1c86a658b18272
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e70802f270c2698f9b2c311af98211dc40fdc974
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_17.txt
@@ -0,0 +1,8 @@
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee9e30b5a4f8234c7bb3b80e19a382e8c164952a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_18.txt
@@ -0,0 +1,2 @@
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..049254c61a91b9769532e87eec9d9c2f86fafb09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b392a9f38dc169c4914b6d67e689bdc3b914c684
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fd51adfed9eb78d046509fd54141305c4a6a229
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_20.txt
@@ -0,0 +1,3 @@
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cee32f7345f02a871af3e014011dcb7eac7b1529
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04bbd707ec828816111ad1e1b247ee617ff627c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fae6d4d12ee08c6f43f451f13a01e165094c9251
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f28ef0b1688c056d679a1714e7044a8587519d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_24.txt
@@ -0,0 +1,5 @@
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a85cd27adcf77a3aeda422ad5eaae946f98e331
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed753e2adba998aa58ca6c921ba281a9abe5c984
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_26.txt
@@ -0,0 +1,7 @@
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3308995c557f15cf642a3ef781c3c7b2178ee29c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a4b2b960dd813fcabb4a44c62d3595f7c60ef60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f902802d2aef874828758a721f15cdf0b4a37737
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b9faf2a82c4e5d2dda0f99414c4d21074c0764
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5f1bda4986e47e3eb2985f9d8ad5b4b36428899
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99bcc2203021cb7f3bf19d797a77c625eeb0ca86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc44c49a624e7f1dfca407a8459c10f860c35f49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_tutorial.txt_chunk_9.txt
@@ -0,0 +1,19 @@
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5e45bb376ec102151ee42c1b1254379200eeda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6b92199f150d27688c5d9b53755e97c4087523d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b442f8a131e0d9b12256b887bd1b7aa1ca5ccb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf628faf4734a6fc53c2db19220509f158bb8fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6deef0c79652ce462a35f2a68e9a983d1aea910
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa37bcb0827e3bd1cb916d293f313086785b0e19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..734adb97712c8b07e5c868edbaa5970b4a1eb84d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pipeline_webserver.txt_chunk_6.txt
@@ -0,0 +1,55 @@
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db5690742422eba7b3fe249d90cbaf0f57337edf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ecc77883cf121d8dbad5c962147bace022883f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46312e6b246bcf600369bf6a9c80af3821b7b9d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e1d2babd6309bdca6bf675c57a4928cc69d0090
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_3.txt
@@ -0,0 +1 @@
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5814497d76551ca0c18c6d3e5719d7503d0a1991
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14fbe1c9445290a1a03dcd5b10162373f5aa4260
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..444538a6c20e3ae7b162e4ca11962ced0d7cb755
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd4dff98c89a63a2c94ace8193b5a52062d32570
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_pr_checks.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f09dc45baee651c9a41cc187054ba63bc5f445f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64c82bf1a4d061c98e2a6489747d269d62c583f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2420cdcfdd23456d073b7f93779e109d5a3533
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cf66a2ad47430bda7348c3ea85d05eb4992243e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cba201de819b2743b0973ae9b6ad6dff18f06d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4209d043dbf94969845c94903013115bdd6b233e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_13.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5387f8bbcf3cbbfaaff4c8ef33cf720f5832958
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5daf36dbc5ed4e8b9dc965af7b55bacb65829ff6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_15.txt
@@ -0,0 +1,28 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76029a0657331c8e58293db3f31177d39211da1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32448036eccfcc9cb53a403de3bb6a867c444f9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_17.txt
@@ -0,0 +1,6 @@
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dfc1de999012e8fdfe3ca972c933cde44fa5b52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_18.txt
@@ -0,0 +1,7 @@
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75976367ff0894d76435b07bf524ffdeb9dec4a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_19.txt
@@ -0,0 +1,5 @@
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1db157ac35093ca375cc315975169108ab5b788d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a84a663cfd2cea4e738f994f414723d595c54435
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_20.txt
@@ -0,0 +1,3 @@
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
+
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cde40cab524da2141219a363b3319e78c291735
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ff523c9fcc53e1d645dd5f0dd5cd5fc3355e79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f09ccc4213600088dde154a4af0507c56d256927
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_23.txt
@@ -0,0 +1,6 @@
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fff9cff84cb1985ce58cbe39d8040881a9bf1994
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c908fe493bd967d7a73d2ef8819fa39548d141f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_25.txt
@@ -0,0 +1,14 @@
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbd683d08485a624e66a59e33848ee1224d92d58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0b90246e85fccb0fc53f2becee64f175c8574f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_27.txt
@@ -0,0 +1,4 @@
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4c7608f11c507c855c974522d2bcf1eb13362bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_28.txt
@@ -0,0 +1,2 @@
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73081ba9feea8e1d9778b541fd1f66faa07f05a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17c024005c1c334b54d7bd990fcf9c00c095692b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b38de1c7fa55162e6b8ce96bd840834978f007d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6003b2a7cf1ccd24f6ed4c31a95330a399313aa4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_31.txt
@@ -0,0 +1,11 @@
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da6636ebaee3f72ee6a6e4f0c04859b76d71ea8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_32.txt
@@ -0,0 +1 @@
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dcdfe8f56a96f6a9b0e7e88db9bd6fe4359a14a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_33.txt
@@ -0,0 +1,4 @@
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efcb3d48e3a2e2b3ee0b464b174bcc0b90b64f72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_34.txt
@@ -0,0 +1,7 @@
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51939027d50458212ca5b802d10996f9afbd724a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_35.txt
@@ -0,0 +1,8 @@
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4074bad3b5fd4c62fb930adc0f24c6fbede23ceb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_36.txt
@@ -0,0 +1,5 @@
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504172178fdb581bf0c5b0a26ad89624aee73638
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_37.txt
@@ -0,0 +1,9 @@
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6011a1bfe482b4b4d9b2d702ba6a2f9fe297c4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_38.txt
@@ -0,0 +1,8 @@
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21acf9822e589b8dedad31cbf4080afd65b39710
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_39.txt
@@ -0,0 +1,4 @@
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89ac376bc9e29cd64b19fdea457cd1598390ea95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..562df236213cad8a3b625bacb1946b1641e5950c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_40.txt
@@ -0,0 +1,9 @@
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6a3f45ab1c4aacfa2f5dabd1c6e885c1518f305
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_41.txt
@@ -0,0 +1,6 @@
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
+
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6768a3b78c42ee39f1a8f3493544660bc8637ebb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_42.txt
@@ -0,0 +1,5 @@
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e59a91673877741f8e01e6fd476e39c1759147f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_43.txt
@@ -0,0 +1,7 @@
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10500b7b613803e61fc9efe586cd725b9ffd6ce7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_44.txt
@@ -0,0 +1,8 @@
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d11d01d0da22eea9691aeef3be5a2723be8ff79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_45.txt
@@ -0,0 +1,11 @@
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38c551267188fb5330e1621774de3cfdc375997d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_46.txt
@@ -0,0 +1,3 @@
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bae6759798b4208ac9ba689386fd149daa07c71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5e12698158da393365b490387bea3b4203b18a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54bdac075cc97b9ec23c2a6f07ad47a12922bdea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70870164954cef24009dd7b00f6e1b5d5b2335e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4613fdc1024d5ebd3877079cc38d21efec9613c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_preprocessing.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba1f78602623e95c3d4a5a7d92ee57b516cc4612
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..574c8782ac687e6f897b6a1e31bc65fd83f594a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d70e5635f4a723891869aef69c1cb1276b6d8ff3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..068ff65f40a5ec453e77b0fc0379665b6837f992
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428e5d4c724e1fe0eb277a64df4e4bfbb8e62395
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdef8f821509a5c49daa664b8af1ca95c0373f1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0bfcdeaf8ba249dc892b9879eace74e50b6f017
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57eb161bc1bc10dac224da1e110fb5cbc8373afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_15.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76c1f4fa98bd2bed37782e617b14e438fdd88392
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e43df74bcd79b1ecd9d89e182775494cc7ecd97b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_17.txt
@@ -0,0 +1,14 @@
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b20950f64eb2bec9fd4ee887f4f3140efbb74883
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e789f3b01a1ffef2450a0e8f472e18b1e2659c08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..349fde3acdeb8250cea8b62b9844d5d9e448aa66
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9f19a549821e7266809075a35aecb38e614e138
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_20.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2907325926abdd83c5e1c6e8dee65ae0ce6da243
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af078381736c4d5c67ce81c0e3c3378558f8f3e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41e0ceb53084306ca9bd8a4086412ea73e7c30f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aad6039b7bd9b1620b9cb051effecbf7f30a9628
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_24.txt
@@ -0,0 +1,7 @@
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e0a09d43e512ac81940f87b9e046384c0efcb83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..810bd74ed5f0099a7e5ece99cf3dee01d21e7506
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_26.txt
@@ -0,0 +1,9 @@
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6374c4167536ca56b038986a81674e14eb60e6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_27.txt
@@ -0,0 +1,11 @@
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c80aa448bda32817238d5f32df1f40b1fc43f3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad4cfeaf1d2e860938056c017ca862078b8d0011
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_29.txt
@@ -0,0 +1,3 @@
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..640e62fc96e63dac115d63a16fd87d24337a0fa6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3759fb8925eeb779024f15f6cfcb0d68162bc9df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_30.txt
@@ -0,0 +1,3 @@
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..750e3879063c4a3a91029ac1f6917055383f98b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_31.txt
@@ -0,0 +1,14 @@
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..773c684276c4715cfb00fce72ba0ccbb9fcf2d5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_32.txt
@@ -0,0 +1,4 @@
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21de76228d82ab048451f50bdabe814ea111e920
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_33.txt
@@ -0,0 +1,7 @@
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3ddffd2da85818351df39055a1dd0574fe2961d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_34.txt
@@ -0,0 +1,17 @@
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6714cbbda5375cb7f2cb97d234c2eb01df49aa2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_35.txt
@@ -0,0 +1,18 @@
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23094309ea94abcaf0e520eb31c4143459798ed2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_36.txt
@@ -0,0 +1,17 @@
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33e02897266048465d8e1f3a89b7260891a1039e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_37.txt
@@ -0,0 +1,3 @@
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37ae6b9970d1932a9b37d45175f4fbcf078dbda7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_38.txt
@@ -0,0 +1,4 @@
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..747191271549eb4c8cf7106bd9ac017385effb5b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_39.txt
@@ -0,0 +1,13 @@
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70d7ce86ac172110bb7358adc79b52a63464720a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1083c25ec4996cdc045f921fc5042005f740a10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_40.txt
@@ -0,0 +1,7 @@
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14aef468a78b37ac1921effd30bd6314bf5217df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_41.txt
@@ -0,0 +1,7 @@
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
+   
+
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8ad1c4ce5eb243041f5c8560fa5b7da325fcc18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_42.txt
@@ -0,0 +1,3 @@
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..937807963c74e151f1a3c38cd3fb341176fb320d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_43.txt
@@ -0,0 +1,2 @@
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb28c3003e218bfa413271a84abd846c24b2da16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ff6b641f220daa8e532bc567a05c3b2690c1286
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70c150fb9fff6a18bc5b8b522edf5514fa68d484
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e8c8ada2cf3bd3574f18948f1ba664e4a993112
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30990d00c6ac4ace622ae57fa833455d39de87ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_quicktour.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f7cd7e8f7dce6f7dc43c690fbae1ae60e38d876
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..860b58d5cbbb68345dd6ec204053ab40d638bd07
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28f813e1d81e682ec503486d9a92b60c5599455b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_10.txt
@@ -0,0 +1,13 @@
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..225b32c7e9225fc9e01e8144000ef599713c6091
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_11.txt
@@ -0,0 +1 @@
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cfad54b5cbfafda71e14611d142fc1bc0f03c06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_12.txt
@@ -0,0 +1,11 @@
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d567d27cfd0487c2a9a7e10f6c31fdacc8c057c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58f1ce781c835b190380227d0e1bff052357ec23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28dc10bba0c03211a4d44640c5d21e3d2cd8f761
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff01761f93cbc6b7e759f5466dc8cde75fe2fc68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa7aab2856ca6f2599849fd791f05e90a9109e01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24f200e93f3a1c3ac0f79cacd5a90d1eacd276b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_18.txt
@@ -0,0 +1,3 @@
+max_train_samples
+max_eval_samples
+max_predict_samples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9535ba49a9c5e2597c6bec81da12a3e25bafe35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_19.txt
@@ -0,0 +1,16 @@
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eaddecca621fb002caeb66650b566ddafe903253
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cd44aecad63969eec186f78ded2b3d5c12d10cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_20.txt
@@ -0,0 +1,4 @@
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e2808b47fa9397abf9415a8f86a3cc2e4cff145
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_21.txt
@@ -0,0 +1,13 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a91888524ddc7a4bc796e3136f27b91e9c01b489
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_22.txt
@@ -0,0 +1,15 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a1ff87eff49aad2c2676ace566c82a045ecdcd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_23.txt
@@ -0,0 +1,4 @@
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3021c4be3cb0de940956b1b240a18410f4a81178
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_24.txt
@@ -0,0 +1,14 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efe59815a2accc9683f47816c2c76be33aca8644
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9574549260691ce5a3954bc26e96f66c99ed88f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f82525cca53a7ce5a761250457cd61874eaad27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_5.txt
@@ -0,0 +1 @@
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed8b987bfa59ee91c85f49c4d305d1a03845ba2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb986934bcee0fcc7d6d687a2755d503c5d5cdec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c608f8cbf876434461f749b1cac1aaeb1405fd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..942e9b4e3fe63c608096d4ca2aeb8bf70d7e1a90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_run_scripts.txt_chunk_9.txt
@@ -0,0 +1 @@
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_sagemaker.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c72d4eb3bf9a20dec631a66cdee7e26049b8464
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5c02b91d8ea26a610cfa485136f7f5dea232d17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14caf825070eafffc17033f5127089835d4ff39d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72c53d9f5ed3d21337586a314a34b6f20a677df0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b734ce26e08beb2ab833067de099e4a6b7d0e3fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..690618af22517bbdd34e327aa08f797905109d73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_13.txt
@@ -0,0 +1,5 @@
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..880deca2bb3c7689a06062da0c88e6a3b5f0fb12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_14.txt
@@ -0,0 +1 @@
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8fe79d0bbce411d03554be51add982d6b1c5a8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eada4401fd54360ad76f82db770ba2e027df6884
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5afa56d17beffe1f48ce15524cb519e8000f50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..032aa23577163deaf8e91329c016ce8737bc2a38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36447be00766132cf77426f2928a695d578c5dfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..156209760fa95afd9c3075a3db0db2feee615425
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81dc751131eafec546f94a4548b11c8f01578631
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7029f6d5f3487251b456f84dd699c211b9d4b01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_serialization.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ae62ff4499cb61ee5ef1fdce68fb1a82fd8ecaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93ccac8b6f5ecce20c96cd7b40c2376cc76e3242
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8456d8c2438011fecae4e7bc63b4f7c9b0c7a14
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e0f0a09412341bc521c8a1e4f1f6e4095059150
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_11.txt
@@ -0,0 +1,3 @@
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d14cef16a11c22cd67e98a3b10beb8898f8c8382
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81deab889c57a36688f0b80a8a5a3d5de5a91ea1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9d9daa387a0ba837295cd704946ad17b6a6afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_14.txt
@@ -0,0 +1,2 @@
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67d5cb8c02678200122dba6d46e7ef7a080b8de7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_15.txt
@@ -0,0 +1 @@
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7440ee65ca6c54351ff1b85072ff622b4d3fb7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9e9d81b2990c94811d08bcbeb29b3e4025b0487
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_17.txt
@@ -0,0 +1,3 @@
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..006c2221bcc1a8bea43f774b453a3bcfeb07b750
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_18.txt
@@ -0,0 +1,8 @@
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24b1d5031fff3cf0bdfcf966c493bf5f2777d7e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_19.txt
@@ -0,0 +1,4 @@
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c5c41d53aea3c92764562935beef338589ec7df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2630d9a67ff416a5a381e74cda12da659799de82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_20.txt
@@ -0,0 +1,2 @@
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad5157c592140930ebc59809181bc75759145903
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db919ee52406dcb2672e40e93fa18d297f34601f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_22.txt
@@ -0,0 +1,3 @@
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29bdd0b0a7710acc0d154637dd3551e9a84df331
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_23.txt
@@ -0,0 +1,2 @@
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec6a6235ab93ab89a5fd461b3506273928126e15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_24.txt
@@ -0,0 +1,22 @@
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1325945c2e5bdab7093430b4fdb18f50d72069b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_25.txt
@@ -0,0 +1,3 @@
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f72e56dd71de6853dbb96d8ee20f74771325b72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_26.txt
@@ -0,0 +1,2 @@
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..feedcdb2c798308dd4b91c74516e6ea4e6623a30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_27.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dc27335f9ad49683b405f01249a3119974e0eae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_28.txt
@@ -0,0 +1,3 @@
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85dd727cc3057a3502339c652cbccc499e7b02d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_29.txt
@@ -0,0 +1,2 @@
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d19ccad34be4591fcbfa40ad0207b36bd5ff1995
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3edba3ec5bfe4d2e70e88f48089aadc2544891c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_30.txt
@@ -0,0 +1,6 @@
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56fea350fc65fd1a280ba0daecb35d7abc0b6567
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_31.txt
@@ -0,0 +1,3 @@
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e65f2b0f679216b810f5e74e98ef987833e7ca8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_32.txt
@@ -0,0 +1,5 @@
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ab6eb2289557f56ff808df276c3b8948cd6da3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_33.txt
@@ -0,0 +1,3 @@
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f956dac4daf6e1dbbf5e1c2b3e0154709bf3211f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_34.txt
@@ -0,0 +1,8 @@
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab7f0a663ad4bbb4db9e0dfda95046f008717469
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_35.txt
@@ -0,0 +1,17 @@
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee4e1e4385b1d891c545c579b1c7c4012171a8f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_36.txt
@@ -0,0 +1,5 @@
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a90c83a6cf4330e716911c11df83fe0ff11b95d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_37.txt
@@ -0,0 +1,12 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a04c76ff1719b1ae291456444e7c3e092c1a4ee8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_38.txt
@@ -0,0 +1 @@
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be4dfb28cb3f64caa07ea47d577f5c22c7f99ef9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f67bfbb264554b007863add80a3c12bb22d1e493
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ca186698552bb6040e25a00c1584380213d06a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5895450753f3b940f5c005681e33c83ded14815a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa174d331e71159db8be9264a8ab87a1fd0785b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35c68114aeb48ce481c36a51d73184740da4f3a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_task_summary.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4263bef7afad00f7644b3c1a97f92448cf9dc2ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc731137e0205198d071165cdc2c0a14b8961b5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b31e1ea83dd4d76e5e3bdf0dcf0a4566b2a2ae1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_10.txt
@@ -0,0 +1 @@
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f0e798213e7afa6a9f1482243c1e47a44cfca6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_11.txt
@@ -0,0 +1 @@
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30182d74b3a96257020061f8b17078f17062a137
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_12.txt
@@ -0,0 +1 @@
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68d09a30db635e6817263aeba87df26d84037ce9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
+
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed9a8442d25def97244dc39819af5e5543864033
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_14.txt
@@ -0,0 +1 @@
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1159dae801ca9af16d8889124801e15afe55a229
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_15.txt
@@ -0,0 +1 @@
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6df3db16602b7764af08115aae576e9e2e5738a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b3a21f264e776e4139b96a15dee7d763eebbf03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_17.txt
@@ -0,0 +1,3 @@
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1674eb5ea13efabccd668d0f022636bc9dc32829
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_18.txt
@@ -0,0 +1 @@
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..778af5e442053428d9f280bbf58a7905684c8f31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc72126c8986a2587a1e1f95e62fbd6f5d6b5b18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c58444a34070bb1c18d325e56207dc14dbab069
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_20.txt
@@ -0,0 +1 @@
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43a1c6a2615afd0a8ce980c13b8be0fde487ed40
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8525ff0fa19e2fabf23e7576bb511e1a6b1e295
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_22.txt
@@ -0,0 +1 @@
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23e55f297c0e3e792031fa93b884cef2b601b617
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_23.txt
@@ -0,0 +1,2 @@
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c31b3df520a050443d6c42e61ad4b488360ef1e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_24.txt
@@ -0,0 +1 @@
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f398dc0dc9294632bc4aa8522d3672a92faae3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..770663cd9df42cdde4f7643a8830cb7a4fe599a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_26.txt
@@ -0,0 +1,5 @@
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc6edc5aab89cf95721c3c26832551ba13d379ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_27.txt
@@ -0,0 +1 @@
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..796d5828eab434ee3eead24655e0d0c95e69ee39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_28.txt
@@ -0,0 +1,2 @@
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4dcf2d98cb773157c6156e85619a11c1b165e68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_29.txt
@@ -0,0 +1,3 @@
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b81420e82b703a6e01749a133e12ccbf7f6d264f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e4192cbd4bfad81037dc1ba5457ef43cf52c9ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_30.txt
@@ -0,0 +1 @@
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6086d8f4a7f0eec9a131394c4160c8aa45752cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_31.txt
@@ -0,0 +1 @@
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c9fbb6852af5ad71d50c5df950527f3e0ec9026
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_32.txt
@@ -0,0 +1 @@
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bea87f37ab757fd9b52b7b79f023bea9e0f34a76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_33.txt
@@ -0,0 +1,4 @@
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aac02fa020075ec72f8c363a7f9e9f7e5091aaa5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_34.txt
@@ -0,0 +1 @@
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b730b3e1fbca5f18d7409372ba44e750875a386e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_35.txt
@@ -0,0 +1,2 @@
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73f2c3ed4c60cacb24543f136df3fe88eca97139
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_36.txt
@@ -0,0 +1 @@
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83471b38125bd41714048c356831b89eba862480
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_37.txt
@@ -0,0 +1,8 @@
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..816347ef9643b28a4fddca22e936055a059d13f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_38.txt
@@ -0,0 +1,4 @@
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bfbb28d68fa54bbeb39110a3f063763cbd0af1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_39.txt
@@ -0,0 +1 @@
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50bd93081f7b59de0c40878ec5ca3d8b96ffd709
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_4.txt
@@ -0,0 +1 @@
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f5b3f83bffe2f20ae28d3223ef4fddfc70200b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_40.txt
@@ -0,0 +1 @@
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f35b0b49f5884d71a083e24aaefb96b2d42ff943
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_41.txt
@@ -0,0 +1,4 @@
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ea42611e568c07e0f811c98f078b5487c43aa57
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_42.txt
@@ -0,0 +1,2 @@
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b7515b5e81c95e7dd4c38d359d89bd313b464c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_43.txt
@@ -0,0 +1 @@
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bde8f6cb0e33d9a17b204e3a228500c1f8d5bd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_44.txt
@@ -0,0 +1 @@
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbbd8991d378e98ca75ff122d529b8e4876ae68b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_45.txt
@@ -0,0 +1,3 @@
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d286e1cf0f12619e7b03f6a70ee70ef91a3c66fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_46.txt
@@ -0,0 +1,5 @@
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520fbac348612a268b6d847b7157c7a648029a7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_47.txt
@@ -0,0 +1 @@
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..802d7917ce3b6a494e77d3355de0f95a737019f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7869ec85389ddf66da9bc20f0e86ad3e0afd6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb9d260a60fc39566ff62b4c8446ff48babfafa2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0faab2f1dc8a4a2074a1b34623323726a76a2b1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92d53ebcfc6a8564e3131303ac375ea4b6fde26d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tasks_explained.txt_chunk_9.txt
@@ -0,0 +1 @@
+The main change ViT introduced was in how images are fed to a Transformer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be6c33c1ef946d02c211a21427972a4a8fec9f45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d359d612518a963a6d20cc1e8ada659be17ed15b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d45b5828441efca45ebeb812062370306ecd623
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..decc3c13c858713963f8dfc4213718d4e6566920
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_11.txt
@@ -0,0 +1 @@
+pip install pytest-picked
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b11d42bb9681008d57bc39f3eb3fa6bba0053885
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a360b2fae339275978bc6530f49bf547a3ecfa1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87318ea45bd29d7f3224181ceb61b279c5da047b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93971568dfeb31d12b746f457c7a5cf5f051f847
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_15.txt
@@ -0,0 +1,14 @@
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea760c71196dc986ca4911d450fd27104af1f149
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_16.txt
@@ -0,0 +1,12 @@
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9865e9deaa56587d848169d2190a3caa19e9a88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_17.txt
@@ -0,0 +1,7 @@
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a80f34cdcf582c17d1d3b1c47aa930885872a14e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b3b3e3be1a3a1ea3b7bc211f3d90e4716848a5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_19.txt
@@ -0,0 +1,7 @@
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65a43f66fe3b41f0da323a34e97735f00e6e9153
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c4147fff707892e9c4afe9278f7310646ad1f53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_20.txt
@@ -0,0 +1,2 @@
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a709290a6c9ae378202588e35f8945e76f9fca1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81fca82d8318962deb7fa8685668d15f1dc4ecbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_22.txt
@@ -0,0 +1,14 @@
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..778db46e85ae4b848f84b6ac6c21bcba34c6ff21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2d0d49f4573679539d8ee9b2fc808d48dc6fa1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_24.txt
@@ -0,0 +1,4 @@
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ea0fa11c4c1d7842851dd6c20b31bd8b8c16db3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_25.txt
@@ -0,0 +1,6 @@
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc6af5994cee1026f8b92c07d2a522e26041bd04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_26.txt
@@ -0,0 +1,31 @@
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0dc58ebbe1c5a1dcf28a50cee46ff243d61661b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_27.txt
@@ -0,0 +1,8 @@
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0030a7e282bf25db02db5b5f88fc5f5e0807b447
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_28.txt
@@ -0,0 +1,3 @@
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8838e2bb1cec8178bdee65b8665b69361b0a1bd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_29.txt
@@ -0,0 +1,21 @@
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34f7f72fa80cfafa6cb8b146606ab385d813c92c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ed8183d450b379a98f584fc6fb74a4820222a5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1adfbadf2bada349afd84c1d58d27ce661a21740
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_31.txt
@@ -0,0 +1,8 @@
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5080b0234ec315602d512b270099e830ec37fbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_32.txt
@@ -0,0 +1,7 @@
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d81b7edd1626a00138fb16d6727409b0b6c1df0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_33.txt
@@ -0,0 +1,4 @@
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94d7073b22b5c6373a89fb8d2c101d15e45d5f2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_34.txt
@@ -0,0 +1,24 @@
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f95aab3b0b1829ff4cc450c907b4aa84f31c0175
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_35.txt
@@ -0,0 +1,6 @@
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2633a877db21de55b781b2d64267e7aa823bad9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_36.txt
@@ -0,0 +1,11 @@
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2eef8d77715a4ddb33f22cebe29d793dc7460c37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_37.txt
@@ -0,0 +1,19 @@
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..923fca5dafd7cd954d258208905fca136062ed8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_38.txt
@@ -0,0 +1,6 @@
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b300e799d80b0302341b47fc712e635766069a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_39.txt
@@ -0,0 +1,4 @@
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa759852f8eacc00e25ca1f16e856b780e9469e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6d5c19868f73fa20aa2dbd63dccd9fea42139ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_40.txt
@@ -0,0 +1,7 @@
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e81b3a589153dc904450195b07bcf9c24632fbf4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_41.txt
@@ -0,0 +1,12 @@
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a8aa78a18b3fa0ab7fcae50d1ccdde6c9d600f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_42.txt
@@ -0,0 +1,15 @@
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df994311b6206831efdaeaa33706ddd3eb98e64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_43.txt
@@ -0,0 +1,7 @@
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ead3aa757fcf7bd2bbf96266378108df69ffee1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_44.txt
@@ -0,0 +1,14 @@
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19114433f024ea645f03be90c527e5320919729c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_45.txt
@@ -0,0 +1,11 @@
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b4c8bc84cfceb9a4fc16f489c064c111ec09fa3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_46.txt
@@ -0,0 +1,10 @@
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ebdd64d8c9284eb0f30b98ca826fd48193c91b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_47.txt
@@ -0,0 +1,3 @@
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4462838fd82f925e885163db13dde19dd98c13b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_48.txt
@@ -0,0 +1,6 @@
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d747bccba23d5e428869f7c140c4be226200509d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_49.txt
@@ -0,0 +1,9 @@
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1de10697ff12bb216511c2de4e29d9941912a98
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ac38ac32f17dbdd7776ad05af48561c23f220c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_50.txt
@@ -0,0 +1,4 @@
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f4a924e1e95144b935a56f27aa8617699d7dfec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_51.txt
@@ -0,0 +1,3 @@
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_52.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9c8dbe14f7803a050711d2209b0756d1b3e7506
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_52.txt
@@ -0,0 +1,3 @@
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_53.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adcf520f69557866ab92b69c4b2bedf8712c30cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_53.txt
@@ -0,0 +1,5 @@
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_54.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c53f647426b7916b5976c23e572089d886bf5e04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_54.txt
@@ -0,0 +1,20 @@
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_55.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5d425cb1e4b1f4bc51a04bbeba8fc81cebd754f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_55.txt
@@ -0,0 +1,11 @@
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_56.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_56.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5aa367d97bded08028006698024e8080c88c709
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_56.txt
@@ -0,0 +1,22 @@
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_57.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_57.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a4b1148387535e6c43dcbc92098ad5086f57ae6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_57.txt
@@ -0,0 +1,16 @@
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_58.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_58.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e5a65455c821ae7f8c2b1c26af1a778a17b67b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_58.txt
@@ -0,0 +1,9 @@
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_59.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_59.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd591116d15191c1393070f3e039d458e6510739
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_59.txt
@@ -0,0 +1,4 @@
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a322f1e9979c14702cf1135e37bd0cd1c687ff36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_60.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_60.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b78b1a37e3e272bb96cb150063301439dc3297
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_60.txt
@@ -0,0 +1,30 @@
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_61.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_61.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d18e5957613ea660871b1d1ed7c228380a73dca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_61.txt
@@ -0,0 +1,13 @@
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_62.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_62.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d53b987cc7766df7d183e9dfde4260f571de65e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_62.txt
@@ -0,0 +1,16 @@
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_63.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_63.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0eff0f1889c70caec544e6054e118c8f992db6cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_63.txt
@@ -0,0 +1,11 @@
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_64.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_64.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd92f3b364591925678fc9ddab867a460a9f6cbe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_64.txt
@@ -0,0 +1,15 @@
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_65.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_65.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6446aed8d2178bfce28440e055ffcfd4719c596
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_65.txt
@@ -0,0 +1,6 @@
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_66.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_66.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7e326a42853dd719a4bb01308de4dc3ca07f1bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_66.txt
@@ -0,0 +1,13 @@
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_67.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_67.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c0ca22bac7bd50f35d5aea1f18bd53b3db5be83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_67.txt
@@ -0,0 +1,9 @@
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_68.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_68.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7a08631ecca40a8b728776f3bcd75630a4f4c97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_68.txt
@@ -0,0 +1,8 @@
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_69.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_69.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbeadbe98b34747131317f29e553d06c504b70cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_69.txt
@@ -0,0 +1,24 @@
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12d9b368b45feb5013ddfe2dc76d7dfe7ca564a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_70.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_70.txt
new file mode 100644
index 0000000000000000000000000000000000000000..764c826cc14d8bad8172420ade37e4add6315bc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_70.txt
@@ -0,0 +1,6 @@
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_71.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_71.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e33408deaaebb1321754ed146b1bff34ac80e31b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_71.txt
@@ -0,0 +1,7 @@
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_72.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_72.txt
new file mode 100644
index 0000000000000000000000000000000000000000..506695d852906fb421e57aca2620f85fe6f472da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_72.txt
@@ -0,0 +1,3 @@
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_73.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_73.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75b21294079b97ce21d7c51285e01202a1a6b83a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_73.txt
@@ -0,0 +1,7 @@
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_74.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_74.txt
new file mode 100644
index 0000000000000000000000000000000000000000..209137281210511525c0d32ab23244508f8474e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_74.txt
@@ -0,0 +1,5 @@
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_75.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_75.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cf669ff9a03e504fbe4e0a58d2dd603be45e94e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_75.txt
@@ -0,0 +1,2 @@
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_76.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_76.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39b950d70b36b485926bea4ff93d9b201cc01033
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_76.txt
@@ -0,0 +1,14 @@
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_77.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_77.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8efe78a31187adcb12c358c6dad7eb3c14a6285
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_77.txt
@@ -0,0 +1,8 @@
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_78.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_78.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dba459864547063dcefab2a068320148423aa2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_78.txt
@@ -0,0 +1,9 @@
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_79.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_79.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8e8ee75ac37319acc6809fa1b48ca12a9aa7628
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_79.txt
@@ -0,0 +1 @@
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3536896f7d22ec88567cc1426627d14a996710d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44bed3b3d9db4d2a65c93c7f86205afad6ada104
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_testing.txt_chunk_9.txt
@@ -0,0 +1,22 @@
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4263b1ee45aeb4297c977efb757a9fc206ba092d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fab5a5491092eaa3931d62e6b02d39ac837dc467
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc86d9a8ca294b2fceb003850bb5b0e2f8edbcfe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d832779fc5bc0a3455ccc78df5cbcea56742fe65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..148cf1c561dad67d307eeee2698b988c4a3ee115
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+pip install transformers --upgrade
+And then you can run the following code:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..760a4ea6c95bfd19ae984db3d74a3fecd2a71ae9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70cd4e43586595db51316581cb3c55dbabe6d40d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54c3c07b0d9177bb459bbdfc2cd06082aea7f3f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bad5a094e6142b546f29bdc26cc4621fbce89c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_7.txt
@@ -0,0 +1 @@
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ffdf0fe3e7c053820d0d40059fb55d02e502881
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c51d75626e6d5db5a470104c3da0d11b9028d326
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tf_xla.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fca833f278d419f7b48a17f5cec552467d96089d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05c382e1d390194cc1f1d281fb6eed917b80dbd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8b971bdcd5200b029da3b47c899111f194d876
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tflite.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6400f8e14c663763dcc1f92c32e3be19c222de76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..911740102630b02ac3863ffe257d56d951aa04ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1fb57ec8f998537f0e2caaad7dcd1150c9b56a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_10.txt
@@ -0,0 +1,12 @@
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe9160a990b39e1ba7cf59e367aaa865c151e59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_11.txt
@@ -0,0 +1,25 @@
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9220661b878751cf3caf35c46a6ade20f24118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_12.txt
@@ -0,0 +1,13 @@
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5870b1774d4a277807de2d5be9a73c0e51bf8ab0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2435660b96ca03b4642f143aa68f66a44af4228f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..508d8c9df1e87eba2ae6e6a4e362675bcc7860a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0443c412abc50df602e8683aadbad5aee88f6702
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9cf07bb64ae3b840fcf8e0cd85a0865b7bd84c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f4a4f278d8912da94eaac7b59cc5924f5428ff7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d89e15ccea04c4e0812597e36c63bc8a3fe15e80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17f96856aa0001480ccee5600d91703a37a607b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_tokenizer_summary.txt_chunk_9.txt
@@ -0,0 +1,47 @@
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbe624ec502324d072d225895f73b48c3d26caa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5c121aec74201c745918f01d46eb83f381d6751
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24869c1fa61c1e6402cffd04324d7989d9087db2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e44361b09658b8aa22f257e50fd771a37bbb79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5140a31710a3f0efab9e394333ded877e5eb5307
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_4.txt
@@ -0,0 +1,68 @@
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdf78c2688279e494744a86577ea5d47a003f8db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93c92d86007c7ae48618e72c8a307ca95d0bfd84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adaa241ddbe1ed51a2637d47de1ae4b27f1f5863
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66f07387277b62e42b688bc071f1c6b6ee336662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_torchscript.txt_chunk_8.txt
@@ -0,0 +1,36 @@
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..895c63c480957a5eb237258425b1f3348d619d08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa32a5dc0ef4d82fda0a28205a7aea5034d0e4cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1 @@
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf4137311133db897d373af96f28c7fd8f50a7c6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2226a956f5913d912fc9a03a0214f89c25889cd0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be6ee2833617a6baace2ad097733f4170b321516
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a3bf0b437b8f61136cd54f094452ae496f2fd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c0688ebaec6d00900baf27a460ae91d63376929
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_14.txt
@@ -0,0 +1 @@
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adb882ef804e7edf7bd3d856dad1610185df1bc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..667b61f9bd52e0ed3a250a8ebcd5d388a35704f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c94363c824f0322a6ce2f8af8147bcf2154e5ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbd32f29dc185ec0f640313a52382b2622ea2eef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_18.txt
@@ -0,0 +1,15 @@
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6619b3948d033e07e3027e8b441111a9821d69c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_19.txt
@@ -0,0 +1,2 @@
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3029a38a2b8f68aa2a0a3dd3816cb7d95602175
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_2.txt
@@ -0,0 +1 @@
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b1116fb88b71d40eb2a16088a774d9fa6602dd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_20.txt
@@ -0,0 +1,3 @@
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..059eec58dbe84053f9a152c66dc8b9ee8f216247
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47300f88f311c402ab986b3699823d26aa175c25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60b3afef91113e6045f50c4c636c7e7ceb1747b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_23.txt
@@ -0,0 +1,6 @@
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e81ed79ce42e7eacbc5d5cf73e8b4367badbc3b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_24.txt
@@ -0,0 +1,4 @@
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc9a24e58a1a71e17223052718d0388fd4815029
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_25.txt
@@ -0,0 +1,27 @@
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a56a885f47379b3b7fb9a4ae70022dcdd8f0596
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_26.txt
@@ -0,0 +1,27 @@
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0782acf9ebd21c94baa71dfcc124e0466b8eda0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_27.txt
@@ -0,0 +1,29 @@
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7992d4d56d6086d505ecea205b2b6be4428b7091
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_28.txt
@@ -0,0 +1,4 @@
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64bc6dfeb317ab26a5a0d6c0571168b5756cf045
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_29.txt
@@ -0,0 +1 @@
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e3c102cf3d5576963d1aafa22dca75b63304dd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a787d4ff655d5cf1c218adea5f5283dffff73b03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_30.txt
@@ -0,0 +1,30 @@
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f27e80339d6f3bbc245b05ccede9e06989783fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_31.txt
@@ -0,0 +1,4 @@
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d73db7a69357b1fe947ccdd8301e1f0a0c1292fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_32.txt
@@ -0,0 +1 @@
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1312fa3bec280a6a7642517eae19d12d69567780
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_33.txt
@@ -0,0 +1,18 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b683801f178452e2cec874c715e6c33287931cb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_34.txt
@@ -0,0 +1,25 @@
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1c5be31a4c2eca13fa486dc8e519c1f8b392ebc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_35.txt
@@ -0,0 +1,17 @@
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bd013afe844ae5dbe702d16527651a6972e19c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_36.txt
@@ -0,0 +1,22 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47bacedbff45f29b107c9aa7462efc9d9b2fb5d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_37.txt
@@ -0,0 +1,2 @@
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05a229ffcc09af1c11ebcb2eae5d1d629c07638a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_38.txt
@@ -0,0 +1,13 @@
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12aeb80e5bf0df0a695e0ca2c926bae271a1b854
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_39.txt
@@ -0,0 +1,19 @@
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10618abb65559b7aef537c7510da8534d8b20c13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0351219158f381e6db0f151c1dbf530f9787ea82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f898ca14296ce86e8a24e1bd68f24c264fca83f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59ae05ae4a6bb1252e11592c1e07220e7fa98992
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b654f043c8503b611d73080a3e31d839b811752e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c26ded35303bac1e605fbf68e46980bd96ba06c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_trainer.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72be5a5fd1f22069cd98077ae7e426b41e171395
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d911aa37663f668214bb6a266eba36b7d9491f9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c9f8f4dcb9c0082c2b41e58bbb405ba33a693b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
+
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98aa57beef7b6b582b049b673ec6127af7494b0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61e591c9ba0f1d3d562b0e8a6115fc2e58838e91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b421b2df9c945dbed70247ed0261e45508f7017
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..809ba48ab9d49ad526952882cab56b6339629a8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_14.txt
@@ -0,0 +1,8 @@
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9931e8e921f275e84e4b0f2075502e89abdd7ecd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96429de1782e294bcfd8c3e5202f59fa13bfad6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f788a03260aba2def5e3f29b19920a8efdbc0d67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_17.txt
@@ -0,0 +1 @@
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa12a7446883844a7ea4fc0741448855c4c8cd71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_18.txt
@@ -0,0 +1,7 @@
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6a2103687cb01cfb99fd056e54e8604aba93e8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97a5203211888d3ffb32262f18b1a44fdd994d60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1760ef70ae290fc51754c79332d94cdfea8396ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..564dc94abeab364c9c721d231d4e943401b7094b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_21.txt
@@ -0,0 +1,5 @@
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..743b91bca93db9ce1515d170f3e711be45a5787a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_22.txt
@@ -0,0 +1,7 @@
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3592001966b30cfd5296fdaef08084ee16633bd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2cb960c7ea407a739ff73de8c86e5060c31ca51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b921e85eb677c94e2e6a4f8943abc55df7163083
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fa7eb94499aa827ec4dbff306d359b209989ee9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_26.txt
@@ -0,0 +1,9 @@
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11cc9aa48b6050789f678518f6d532c67d3d407d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_27.txt
@@ -0,0 +1,11 @@
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dc0faf30c283185b80dab5ccdbc1a661f3dc29f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_28.txt
@@ -0,0 +1,9 @@
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d300c65437c75b073e5c7145a79a655faa044513
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_29.txt
@@ -0,0 +1,8 @@
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eea53b7a01167710f5661873b1f9292462f4975c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_3.txt
@@ -0,0 +1 @@
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ca857959554f19bc3494cddc2f5e423e3c26137
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_30.txt
@@ -0,0 +1,7 @@
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa4c32842e4a09d434dee9bb80ecd02f13e58ec0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_31.txt
@@ -0,0 +1,8 @@
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f00bae61dc284e13212e7041e9cce2894614a0d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_32.txt
@@ -0,0 +1,9 @@
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8d21c2c69361afa71b7c3d2e55ae1b4ca79dca2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_33.txt
@@ -0,0 +1,14 @@
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a83a9161c32d471201f1b73fe9f62a4c466f0bd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_34.txt
@@ -0,0 +1,2 @@
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc535b7db3bd6eb7b663da6e2f253ffb8a9e19be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_35.txt
@@ -0,0 +1,16 @@
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0ca6dcaec5e1ff66e7f76d4af258b30d2820063
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_36.txt
@@ -0,0 +1,4 @@
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59962e030dde501c5f52c8ff76e48512bf0c0db5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d58ac310126e0c4291d139cf829fce91c5220b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3a670a917ca1e01723c4ec25e4cbebab5eb013d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..899b793dae035c99ce7a48955f253c186bc3f931
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38eb84a4af3a9481a5ad7bfafbbb0245a801abf1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6417a69668d9f94d949ca0112a4f048c8cb03abe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_training.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..420d2bccecc7687b92d929cb2dff84bf2e5445dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..895904e4f1257e2eeaa869c8d730a0e06393082f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14b4003b1c4f80ada6f80ec5bdb8170e3367743a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ba80f353c95329c5fd8e1febb483cdd4d3e25c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd29f321305b2b7ff5940b1cb7a0bb13454556c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..633bfc9184f79313118c6d1073ea88b4de70af12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd82bfab43b6af793cd324d9e09e43f5deab2aa0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7c2264e030b7c7b8bbc6ce388a0802f897f827c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_15.txt
@@ -0,0 +1,5 @@
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..863286937889f429d267a62ff95d147fdb90593a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_2.txt
@@ -0,0 +1 @@
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c2af4ec4ceaf54b8d686c07dd2e5ba5105e7675
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37c1b8ab30c899d2611aeafe183d411a8d6e0cef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..131c8ced4f8be2a0861e2dc3905228701e393476
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80c8da847fe35fc99bf9ef75d66c978b73d20e8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cec3cf05b1a14444597b828bc78e883415d606aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee72bbd651181f531b0b0740b1de1e0cf96afe0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba18e2eda674bc8037e14b8fe88735d5a038110d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/_troubleshooting.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_audio_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_file_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9b074a260df9872acf180dec6cfd414c98d9fc6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ba5c0dedf9e7bb1b03aabe7888aaa19c68fda7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85d68ef683eb6885ecc23070c3347b5a5430b7c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_generation_utils.txt_chunk_2.txt
@@ -0,0 +1,223 @@
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc6cfcb80bb56408f29b2d01c986f25ff24ed4f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34308fd561642f47d1614dbe0df9cef1a8347652
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dba6b87bdf31d9a684daa4c48c7f4db03078e4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..320473b2d25eccbc15eef1451eb2a45491758e6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..091b8bcd63cbd41a6981d2ac0cd43be6db2e5a16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea8c495ccd90c11d14e789530ffd52e17e5b7ec0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_agent.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c37ef8029b11d7f6729ea4963dd062a3202e4cfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d963c8b858e245c8e59bda3a1b08b36ce9b7ff9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b986eed86c85dd4561536d16caeab0a3130d261
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_backbones.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9478d18966b090a0040d7b1fc824e3519400c307
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a7d67bd5bc357b7d63c3b7a3b9d52119dfc0375
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e17a294f6bc27e307c3b5ff224c2830b38370d67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cf99e1f448033053577f5c6e0a353405c8d8c77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..606c8bdb13a8f4ed9900ab47540cf4c945d0fe9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_callback.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_configuration.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06285f216be79d5aaa1f753ddd9638c5acb89e8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91c5387b3934b4b5a2d9dc7ef1e8c08d98231bcf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16e6f61ed85d47eb4b32c9a007ef92a5b6938f5e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29430a2b265e934a4bffee6522d256946bd78131
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..270321731447bf21662cc70a63edbba447ddceca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..835696b50979c23db84f71d2ed3b428a3ec7dff9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1ec1a3f97c70d7fbaaa990069da85f37774b508
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa046d9a12336eceeaee6551868fbe0c01b3b7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_logging.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef47d72ee01d86ea98eb9077ec8ad67363d3acfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9ff16d761800fe2db6a0a16504835bea66f0e08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..376e6f318f8b6683b5541c77416bbceb5c1c1d23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_model.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcffe233d2541144919d24195e7dcce4a3ebadd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f184112714b592c3b006a7f41f8ed98a081fe923
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d0da62226b6b3b0aab08b97aecb072ab2cb9feb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_onnx.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17bfbc2248d16904dc90e99a676ddf13a134f1ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30fb95005388a311bbcc0681ebaa4223880c3d2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88f4eb7b5708e604ed6419b09208cf63a4e82b9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_optimizer_schedules.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..424c8d3412ea9f17d89bd01e4286199d02168838
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5620ee62216d30d7c2c6dd6de6777aa6310c7c88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f68c3af8a04acc2fbd6a9dd5878a6b0f02964ac5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb7243b91fc0700f26781da80dd55665609d2fe0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_output.txt_chunk_3.txt
@@ -0,0 +1,139 @@
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93a42bdb76fc291259be7166979e317fbcdd615a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b61d86168dc11b06664704bf5681ad2eb8e25e3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8e52725e4394b3ba0c8dd5840c1312830555b10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f092058eba530b827bb9378c596386444407016
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_11.txt
@@ -0,0 +1,8 @@
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29c9b6fd0d35b2a5b1e375318b114e1abc6d3654
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ee69f846b944cbf4aeb56415062e156ed2bdd58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..624c176907474f2aae3166070c2d7d8897d70c23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df946554b8f3763b65946af1a421ce545adc07a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d51b490c2cff44a802435da1b8a9ece7f2b60404
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_16.txt
@@ -0,0 +1,26 @@
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce0548fa3ad928ce6917177dded5382743acf398
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4c8f6ac901e12cbc02da5222763445a0ed6c972
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_18.txt
@@ -0,0 +1,124 @@
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8788dd09b1e07aab4587c7c6dec64473bdac9798
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b62ec078aa288fc69380db10b4f373aac55a388c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f05e245097c1cfe9cc9ebc21e63c233540b4bc45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19519abd080177e95695ac64913314203d81e746
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bf334be2a50aa8f443cd9a4dbb54d93b8401709
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05dda8cef431895bdb3a77ce1a259ceb74fe9cdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
+
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d63c6a589aa45483a6cdcb5394bfdde1f12ca69a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c58f66a8db739336108f16bb508cc3daf72921f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_pipelines.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
+
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16d400209b905cc3ee011310b572150fc4d5a0f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..027664b5965df5de3e42c4388742640431720085
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73f3b785567a7d6329d40968ba937caea1e9f0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a31a13bc2ab23916095a21b8c67279791010333b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_3.txt
@@ -0,0 +1 @@
+[~data.processors.utils.XnliProcessor]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d74882a97b09fa565e121eb4e7b0442ead42d5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0b2e154df18da1c83ac61e92c87274d60b23cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6424787c72fcd51db9956940fa47ecc82666a671
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_6.txt
@@ -0,0 +1,25 @@
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7237978472934cbfe3f3a8e1f0824e3909fc77df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf0e03c0be60ac0d79c504ce41b2202777ea9654
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_processors.txt_chunk_8.txt
@@ -0,0 +1 @@
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53a5ed20d9300ef2408f2ed4d9dd7290022a14fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12c84d84d7a92c78792dbd773170f30f14e1f3bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_quantization.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..addf8ddefd96a3d0f3a79ced5e621e655b748220
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e465724b8efa0650a68ca56d5d0991d0bba8fbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_text_generation.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b866f1aa19b7d7186cc1b02b54e50f2fd44367d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb0610c8043882334448d4b1007743fc27254afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f6aa7c8ecbd5cef1f127d67bd30365d78f88c69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e6fcfcfd592633602a334aa83adf12e7bb25da9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fdc245c96eaffbea6fc5d6c04d8841a72c4d968
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_tokenizer.txt_chunk_4.txt
@@ -0,0 +1,35 @@
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de4a7c799dbbc3a94f8f4895489d495f78a63c50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b5f7f7292918e5e8e490982e60d059de46a559e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0a9078aa7af30c48dcc520d5d17a50dfb711cfc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15593c59aaa0c08abb1b465cb97c4da3d870397e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/main_classes_trainer.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..305d935a8b275b516334009f8aae3f1d231f8508
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..589f658c6dffd4b9c61e55d375b62fccac124177
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ce25f8c224bd26e352973edcb58ccac9dbaf3a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5471aeef081c8c4372361439edcb94ffb2d2c7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_11.txt
@@ -0,0 +1,21 @@
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..daf5a7c5be878fd3a931c923323f24eb52adbd4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ca4d0fb4a84cae339a273f640c0e7eb7a3ad79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0330889b93da9b539d08bb3b3cb92ca07240ef26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cf76942bb8b0d5ba447a1902180e08807476fa3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..714d38fc015e8df168af453740ac47d573254412
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a457c12926e958da2fd43ee34bf0b753c3d3b90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fc175863366f412c38ae747c2471d9ad8d139dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c64bb350f1ccd9172a750147c9b9514bb4315ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..193ea87ad1121b52747a09ca9cdcb36a65e3a5cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_albert.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6238d1a56ac8e8f20f6a5ec7c151e65309fd747
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47c3931a75a3630c94b37721251a37abb039487d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21f507303c82446b06fc53229560fecbe030a9ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_align.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e9c5f5f0191d71153964c62cfbc66360ab1fb6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..715c2ca6f3a7deb849f47c5c21b63b2ff14ffbcc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf3009a2eca6c2686ea5583c4bce92f0ced8e397
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_2.txt
@@ -0,0 +1 @@
+This model is based on CLIPModel, use it like you would use the original CLIP.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c193adfe3d1f1d27b5aac187fa09fb86229a19c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_altclip.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcab829a4de96ff92d1747c780a73283835df73c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47b61a1fb8fb2c82687d95cc3119954ea581181a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4ed9bbf738e8fff2c8e3ede9ba7e5f34a2e64e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52c6c68debd637bb6dfad04b7163b9cc5e759c5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4d7b261a0b226a117e79cf9bfbc42c12af860db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8dde09f6cb62e1a3d83fdc87b9751ff6bfdfb1a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..106634797a8975a031e244c04aba1cb4e636eae2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea124628e4ba8781b5bc2571e111ff36fcd82aed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc372e5a873cc378eef3996b248aff59f54ac0f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d78657f67f914714f137f614c5c89c4b7cadf78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_auto.txt_chunk_2.txt
@@ -0,0 +1,162 @@
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b7a1210316e667b9f44aeb1e67fed1eab6e3da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbfb372b4353e07b3241881e679d5912f22b203
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_autoformer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5a82f7b230f5ec1e8d0240782367fe98e3b0286
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..936a1f18a27367354783209bc1d55e55a6351837
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4db9efa9f638442995a12634c5ddce47bd57faaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a587da06c615f9c4e77a9adf5abe117fc69b62d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_11.txt
@@ -0,0 +1,32 @@
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f64719669aaf7d7d1d245a20f60ce6ecf465c2c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29ee7d24c3269bb836dfa54a3ec5b9fe7a2ef08c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aab612fc31584fd026696c5def2976ef89255fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8935d73be555e5b3bc6e649dea6a1c0af5c88734
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76b5ff1c81d89fabec7002f613e1cf23ad985694
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e20b2bfe4f23c9c8b0db89e2bebca37525a7802
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4220ab13704de871126d02e5bbc7f5db619f2c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16dd0a3b647f619f8d48054b702fb3180df34dde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bark.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0179400f1df4998b40f37c6d759e2a9fd559531f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0415f6f98594d6e223426bebb56c7b3bd0e54fe7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcc153651004479f5c6f529eb7838e73799ec7da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f9b411f1fa34d5e182b03a0dfc7f8f5a41501e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_11.txt
@@ -0,0 +1,14 @@
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28148471c6c3ebb2633664cc5694e747a306bb32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8faab0c369c76d61229ad18f2800d4f0892abf8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..125e37ce4ddf069daf2de2e046a52376d03bc241
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_14.txt
@@ -0,0 +1,23 @@
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53f3a1b762a8d29b6fa487b4146635c802c20672
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d35e58e5c54f86a04f1f9d54eb0e14bbdce4376
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98c2646d8a88f22123eee700b29b9953023ab635
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee7fba55aa60f883bf2a7609faaf4268fdb0aa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6328dd802358f3b366fc89a868e58804899219ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f988f3b840a8cb7086961ae8b42b8087fb537e81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8d329c19adbe2767974f94f270bac197c3a6998
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04f53fc7f90bb36340bf7e66d4682a5134ae7e04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bart.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbffcc6697356bdbcbbd85bb24aceab6779ab938
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8288034d9a7dc7647f7723a7a75fbda2b41ce10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_barthez.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..058c1e437ddb5ab8a56c43bf4101ed731fb12616
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c06445f1dd7c3d44940679f0eb17616c653239fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ce94087ea26b04c77823c1292b9f88fd5839c7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..075efe11a02efc8b129f970fb73da61cf28205e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e730f2339ea5da1b17f59dd77f428fe88c1772f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bartpho.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c693fbb3dc2f2ece40984c86ed7d1fff6de1f6bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a616a49710f3b74a4a1246f2dc5bc983b795322f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3998d2eb6f0c9ec0f218d62a965f9420c71a3804
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a441511f3d2ce0f7d2436d9a4cfa7007946d6077
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b836e2214636abae17f4e2a3789b92edd51d0a90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2798eb662809dbf5cdc69725b245c3710df1beb6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_beit.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df145fee209e963537ef9bcc7dcfcfa3dc53f63b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14854ae8154cdcd28862e839ceda6d92c3fcf2f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a383f1c5a476b60dd456615a5147227600b838e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86e0c4a449505735ba441cbe9329a3a322d2a912
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b3abb9a9718504ef6a797f88d33d0e670f713a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..229cd6fcfe528efc0c434b97f2a28426c4fd6e21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-generation.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29daa8a185488005607c6b712a99d4942403ad19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a816e2a3dd8f06de1f33265707be3a5a320db891
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a08c74c0b75cec606b514a4270204d77c1ed404
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ce835931c66a272c71ee51f3b8babb3a8ce3adc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..016da49728609fea89ccd39b25338d175ea77d5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert-japanese.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcdf3a2f49df057cb73162f25b3c69acde0e8e7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1 @@
+BERT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3635b17ca3e608b8131eef73238d5fce43b35be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83aad3e8a7a76beb8a6dd405748aefab1f23cb78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c35cade6f6cecb4e76983d79683e9eeab65018
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_11.txt
@@ -0,0 +1,25 @@
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60ddcca373fdd0cc68890b4fc7ce905632b439ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b79739992af84ff3a21fb3a63a5a292b45cde0ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_13.txt
@@ -0,0 +1,27 @@
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9a6fd0395158f6c30b0b102639ce2d735f356c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_14.txt
@@ -0,0 +1,27 @@
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffeffa3399546c0928705b8e638c13ce7dad327
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_15.txt
@@ -0,0 +1,27 @@
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff52e166369ea17a03620005b424f8cfb2087b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f7ac943a98dbc7db0e48cff2418f2919800db03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3cbbb40b0bf008655c4178849e748dd5dc1dac6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b385e259bc2003ca1203fd82f7e9e779152cb0d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..266ce90f16c4d60654db5c1cd7faa76c61f71a4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..848d6d0865942e3d62b47bba35e15023ddf4d579
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1dc1c9d084114d174ef1bd3462ebabf367aeb8d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e21e9ac3b1e113e3138b5d587623a05f272a8b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bert.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a98f99c5a29eadf88e36e307a1e26534b674bc2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b37af420a71e99ec28439bbd343632a9a46ddfa7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8facccf865a30ba100cfa6af5a19bc5ccff12b29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bertweet.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca5ed16b0d83d13a243fce461885a0b09643e67d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81ba7ed62e8b1b8ab4a9eced8655978f53d80fbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f792eccc718baf002888210eec0169b4dbeac3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82cf3a1fc24de2599e82c25378665a64db57343d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39a210fe089548b225e7b6ae62abfbf3d43f8131
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1cd5b636a113ffbc4bffc3d6ad9915126bd12e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_big_bird.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8da8cccbb98e4ec723b134a12b73a2361171d2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6167db28161945411d632f5145768e5333254986
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d376759959362c2c0c3a43567fd9562c8947906a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8db43385855a263d4b8fc2ad0624a202e41c9cb7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bigbird_pegasus.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cdc618da7c82417634a60f8c6c23ec7c4e71a2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df9cce9ff7c7fee16b36cc51b356b7ade8c8449e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fe7255628d9d903b141c80220504c82c095a090
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_biogpt.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a2eb747f8db35264438e06db0e8f4df4e8fad1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a842ac2ec7d7a02f42173990cf96f5647d4d4b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa081ee4ad5f7687a29c9adb6a7495a26d70c37d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da79a9052fb0476811e156ec9dced67601ff9b06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bit.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39cc56bf95e76a6dbe57e3c36d9783ee865c3e98
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fb7031eb981d00f64feb595c8a1f36a53db4d43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4353fcf7d86c041448ea6e72549329e6fc6d97a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..496fe395de09f09fb408f409ad03765814861499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot-small.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04a796528c519c3f7700c9f83a102b29b1bff9ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e38ebd2c3a863589c0495cc7014736aa702598db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e1c60ddf526f9cba85f483987d17c94f21bec29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b084a5bba64669c121e688194f14191e4d7d2a23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07899deb39177830c64f2762e8ac32c75d2e279f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..116f9f7162a4d639f6eb536ed859ad4a825d9285
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blenderbot.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19202d00847749fd62ffedaa401400594d60c5cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..073c30620edf5e14b0bd81a1e7c9eb36a99433b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc74ca107a2f107ff781df5c87e7089d82bb41f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_2.txt
@@ -0,0 +1 @@
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e466b91f92a39ec633ec1ebcf299b00e6bb8a55d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip-2.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22f2d76deebf413705e4f79242d624833a257e6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e5fe3b23272d532d01e826b1f4677a113d7aebf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..070595fb6552f25d0cb67b9fd19bf6ae72276c25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..792735ced92de764ce986da3609fc32c275decd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_blip.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cab1ead41a6edc2614072ed2fce9f93c678a977
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2088f35931b17713b57435000df3fcd138a16d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f32d0f8ac0c1ed962c731fab36185f54a9c7899c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_2.txt
@@ -0,0 +1 @@
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2af4e577b666f2d9e3016dbdd9cb896271d3be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5a00a3b2dd2ed51b53f9f3ef33da94bdb97afca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bloom.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d925f493769304b58d339e9395735501fb0eceb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..754a0aa8fd9dfe7efdd7c43c42cc12dd717246d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b3be9f8af226d4480e9d3e983b8426c00652fd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bort.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d0ed5c1917e1fb938a7a186271d7c0f20e6b97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cbc5fffadd0feae02fffc80a6d81204d705467e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f769492776a1ef80a3549e65a28504f463ad1576
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..476e63fb1d6aadcf420292d60533458c8de88cac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e3748d7b4518be3472c6267404b8586da098263
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a25cb4a053994d108807c3bd6330965b98b6f5c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31b8520384f3965bc9221683fd27d572307a4fcb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_6.txt
@@ -0,0 +1,15 @@
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2f0ba7d5eb61c8c5b07ec16a735e3b67049dc58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15f3e4c98625287fd7e8fa2903e82340851b7001
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bridgetower.txt_chunk_8.txt
@@ -0,0 +1,24 @@
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c607a29870136282c5909081faf80c9eb75fc9b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08257b7043961b8cd4c6eac9fe8fe6c24500b43b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_1.txt
@@ -0,0 +1 @@
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e50f0f83779638059b0872ac71bc093e3f6ebe5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4a04350696ec2f351683557837705ccd09773b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_3.txt
@@ -0,0 +1 @@
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..946dd84920bba48e97ac31021e1ab06eb4098f18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5c340601bbaae45e0afcb78988742c32e37d29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f1518dc07c367b77481774ab4ff8c9ac0090798
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d10baea3ebf30f914feee43e229cc84c121fd33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_bros.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47a26e26bc6345e934e389b29fa0a24e428e297f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aaf4d36e12c69a40c5ab253fe28aa9bf8ebb11d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14570f8b73235d45215987dcc2df6b2052281780
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dec2d44d9020f430acfa0c790dea31f9cbbf906
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1b8fe726355b2463a88a4cb90018debcc7e123f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e016cfbed44417d42b19418d2abcd5b94932734
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66c59d038fbe59da16e2ef96cb645bb62ca41213
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_6.txt
@@ -0,0 +1,35 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0631a1f11df5e93d74897acd4d6e95e9967094a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_byt5.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f57fe2686387caa5191a5564cfcb948ed7fdf802
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..679f5887c45b3c17ca585a32d9ed0a14c891bc16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c59d42aa2a5ea7e0a58f004a9198d44f3d4fadb0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68b04cb9de58cf4aea29f9a3ca3be95eef3a68c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96bcd7a01898c00058b626629973796dbfb90b46
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_camembert.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38eba1d98bc94cb3cbfdc33a66a994f14e5914ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32963c0e47a47f62046634f4f97f422cabd7768e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5721bfaf43ea159336f4f416f1ec8ac4443ee4f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..152cbe6008448e7bdf7387cd404887858e42e484
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a41bec3a296835a3c343b8c02b32533e9b407da1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09ae792b9ab6127cd5b88d8a7e78e1614625dd37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf88a716e694df04fb2da810c157ae3a22e3eb1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..315861ac09ae2e28fe3419f8333adc9da2723de6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_canine.txt_chunk_7.txt
@@ -0,0 +1,24 @@
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f5218d4dc1537da34f22fe02d19cda630c1903
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb4c108f8c8758d682f5320c184dd54ad920185
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..380c9b835ec07bf0d583131f97b07adf725877e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ab653b5291dbe07ae1bed313ea52006709d6fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_chinese_clip.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clap.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651c89af78bbfc448db279a2a241385d8698e566
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd57d43b5e8a6b4f9f9a58a8aa37bedf6b30538
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53da0d4db8019401bf7fb0438ad21c677a21e499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13acfcb3304615d8eed52a12349c3501c53f096b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b3f5e16d54ee0d9c7661aa267ca408bb932a76e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d85ced9cfbbc3d7c52dfe63d7b19f5cf63c1e1f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b4d400d6f8a897488306a70ccd178b3266a0779
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b54811f95b8ec262f0ec11c36c2f9d8ba6f4cb85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3eb8ded4b2fbd3125bae415e28e74df1169f93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..229ec231359d58a151c230b6967c51f77070cbf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clip.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16b06410b7c46bd437938448c2e87206ee9cce5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cae8deffe8becd00570afc407842d3387b43581f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93fbe1c86457536e37fb5bce35373db7755d56cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b110877b0c174445c15cef1615a832543b64b4ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clipseg.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc3296fee00f4b612c159d16db0b07e51f18b8bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c18640433b672a07a8130caa75c73d6ba21fc56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74a910d694110dd814e0e708fa0da583acbdebc6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a2d11ee630707f97302cfc06bc13184e4d8225e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Example :
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfaa6f4a1d2dae41dc56e4f5a14f2bdfd1c77749
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01e05c5dd41eab43c671dcc4afed3106f7459d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_clvp.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5aad6e2bc6cde646f69f326ec9db47598eded19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a00d1e0c0db69118ba7066d409ce032f13f85f21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_1.txt
@@ -0,0 +1 @@
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc141de14223a4f043ca5bf422251fa314dec6ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9af5c8bb0ceac40fe88fb366238503b7083b6905
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee1daff310f9522b7eb2f916aa4d5286a1e8b16a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59b56ef3154300e5f75cd133f1e4fba8e7827f89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_3.txt
@@ -0,0 +1 @@
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e60dbd6f8f68b85df23876505bdea65d52634eaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc52315d303991c26b5293e768e80c8d4e1d9209
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..520cb3d3d4bbf31244cd23f68a3a8c955e67c647
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5c643abe5ea1b5de0bc77df6f4426ccc48fd2a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8624829d32693b565f782c59d3cc744ac8c1aac8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a26789790c7b758da07159abbce1d6322f1d57a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_code_llama.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c641f7df0fa902105c15987251badca7e9aaa7d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fde21bc24449a86b2837d5d402156ace5520059
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ed8145217c0c7e43801a6b2bc8cb30c0e774291
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1a91f9fb1772d68a2449b69df87c9f6136cdfb0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40384b09a12243d278cbc0620e4ebcd38667f7ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_codegen.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16ccf839c7d1d4942a777c36c6c3bbb84de1c2e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..571b070fd6461868f080d1bdb63213c0088fb822
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f41f6b785008cc4ed6da71a48f0feddf5b0901eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beae9d84096e136f4008f2577b8bc17f91293f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_3.txt
@@ -0,0 +1 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab3dacaa073074871f3042125e829e5881d61ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e48d44fb21a4afd5c0cb44b123f4b464a182f41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529f924056c33a2c7c7bf90bf97cf68428b87158
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_6.txt
@@ -0,0 +1,16 @@
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2233a83e3120faa7f7ed94a0e19e94fc6a63b4cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cohere.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81fe50dd51833839df905c92f981f5ae59cd91b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7eb454cf4e3d3d6efdb203d8c506ff2109433a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6019300e874f9263ec7440a862ce4e42a944c993
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_conditional_detr.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9497c38bd2c74b1dc53c1bc255cda3ccaddd458f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1 @@
+ConvBERT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..401eb2549e1374ce6bf2c3c9c76a3106fea8b1ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74e9f0965f681fb5de1cfa2f569d17a9e4223f51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f58f81c229849c7b19931b16d58729d8c57493c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98b3eaaff3fd543ef8109276a19037f5d1966624
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convbert.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38e61bfe2e1155423a8fb22dd9ababac5f44bde9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e303e48e62c8193296f2ea8fe2218bb7f61ed8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..829faf542f22859a3994ea5c8345d62767f31339
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fbad59f1b2335a93796055ffdcef8757a258da7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnext.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f63481d69157acb38f4e56952ab8f568e38c4b1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a42be4e8b059d5e65c72212d9e31014d7529c660
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdea2239f35c35120ef2cf03155c622c87c1a260
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_convnextv2.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6eebc0344a2944b7df01ab20ec75eca112e94a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5070532f248f6d37b4972556f43b28edb6a89209
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpm.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f193d0fe6f538cecfd8b09f83d6e3e4fed68663c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..580653a486c928d1dcff2ddcbb66b26d56603d2f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cpmant.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fa2e1511244ae58a58a20a5d778f76645ee8b2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1 @@
+CTRL
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ace78df973c92f8eb082c3abac8bb4f364317fb7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ee67b1055104ee9aab9afab3167262e584e6dfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..314e036472dffe4539483be577c6f4ed8b07acc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7818ccde3f775b369210cb92feffdba505f8d8da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ctrl.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27493f0be4ef49cb21a07b7effc7025461525f4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4863d004a1af07e87e945dd62222fdb4758572c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05344a321984cd51ba50d6f2cca531d0b2aaacd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4ac7062c646513278a1f928ea5d06d5399d48ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_cvt.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3ab0f400dc242b5d0c8d84d3038609044f54196
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17c414a623bd589785168517486121437c71b117
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e76178b12cd1810290fa3a6a7a1bdbd7f1f0ed95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbbbbe7704e7c88aa53c442eb427cf9cec66217e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d4d69ae20bcd8cb2e1515e40eb72a3463c322e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_4.txt
@@ -0,0 +1,45 @@
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24e526ef2711abddcc193526abe072f7988b3ed0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_data2vec.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..266a020272d0b4311bb9c764b653436c0f905c82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cf5837aca52a52a725eae8cd246e2b6694a97b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b68f38cc9c2f9484a92fd6b6c4d69c9ab4378a8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..985381db82cacc422941d1d1e416bc7a28377e5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dbrx.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7de2e0af676c49d7527d2d407bdb0d5779c226f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2bc9eeebfd05824a31807e3cff5e2010a0ccde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1779f849a62e648157269a07a792cb2fee77d4bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e568bca5f8aa91553acb8c1756c52c4ecd2b5cb0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12efef81df454f61e4c88bda9c294e673e11d06a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2a3de2a509816c540660d820957f5fc48093a2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta-v2.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38e486d3662addcba38faaf5eebfe1433d962d48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d31ef1b46c5835e7633928052f3f76f4ecb3e5ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab836593a8dd1ea8bc28f8f72060b3a4b4cf335
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd09cdaeaaa513ee1d260235cec7aeff628a09f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d34cd1163b3b0803ed5a42315a4e3ef74eec0ae6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf39b6f9e459b62358ed8ad1ffa76b92169bd8c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f0baeecaa3ba7cae904f4875f79eddfd00cd3b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deberta.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ac4537e7c7140c9a97f2fbd7f98ca414cdb9104
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cdc904dac63f15023728193b08f17d895c1896a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16d176a9a1bb47fcde96f7279ee4252019903b35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7e618d79125a55825992fffc3d9d366fa6b9a13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deformable_detr.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ec100e6a99ad8eb66c124c6234f4d0892fa859d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe0cfc8a7a66d417ba3e5c6b6978015ebec739b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c27736eae1690c17bd1084b703b3a2efcd46262b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..194dc109d82031407cbf17700d5f70f30337ef4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4a087de438e3d4da0a6151ac6cb4c47dcefc08c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..684d718fd7339101ebeb3ea29ec3f09e82133e6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa04f477bb2e99e3cfb222edc4a7331f4ae8f1c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3796543cfb080b1d4cd189831840037bf2b9267b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deit.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cee00dfef17b28b9522e3829f7567c5c42d49b8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7965a864ba19ad82f830b674d22fb756721a346
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_1.txt
@@ -0,0 +1 @@
+google/deplot: DePlot fine-tuned on ChartQA dataset
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48040afefa5d1eb89e38247dcb82efd292078886
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..651f03d3a43b0c84972be7fff08bd2241d1cf0fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35ba4b59e8d9bb63b96be0b6b4d7e90865af411e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deplot.txt_chunk_4.txt
@@ -0,0 +1 @@
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a269543e4bcde0504aed63cf1b63eb13558e91fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06cac8df9fd618bfe569e90abbc0ab7b8fd65545
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86f96cfa16fccef5e32a84d037e8b92aa0172ba5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59a013c5a5c5be5d98d9fbe248f13e0cfb228b50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3813d4a27fd21c6ee79afc3a2a4a0d94e5139ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d3f7de15ae618a3d2a02d274539b4f0b00cea7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_depth_anything.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..018a537588342034b3125a2af1f448ce2cc180c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb3d3e3ac9eecb20af096f042c5845fa4cdd654c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e82c350c5e55ad0096e90700c7b4ca1106a2ca84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
+
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9d8468e318dc3f456d42d1bc4b8d31c7e72180a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_deta.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14731807b974ff1a00d517f13a2fa29acee7078c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7003eee4f353c8e73875738bc9115e6cad17cf81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..806867d2ee585ba2feda46fe4282d6fface5112d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5acb10cc23b3783eb59a15d7c84004f9c008643e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbc8208063fa3622ee97a58f29697dbd5f417d27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5990152122d72b5575852184c649e9cc94607b37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed5d721f25674c788bf25f2c662f1b4cbab0764d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_detr.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5b5ca53fcdafa98e42c3acc99bbf537da385594
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e40a7c4bbddc7846703ea40be84e2e4800c34073
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d82ce2e0b8fee25af18766cbaf6b6c92fffcadb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b820f7fa3a76d601640311ec3c9f72a9d46bb797
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dialogpt.txt_chunk_3.txt
@@ -0,0 +1 @@
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75013820afb1f37b1c2a61c06303df64f94e7cc2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6abc370fb860280766929f6226a53e9c2fbf1f77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..833f18bc74db90814c67e584aeaaa4374118c772
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84d7aa22dd673e1647396e3f7f6d5434a58698dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinat.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27be2111a3c5ed14b3e569b7c3e1d5056362f041
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..244b2e0426e61c442e9322a7c66abc29c3c65ec9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..117c0e5ef9a0f68fb891e0d0015d3681638de151
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dinov2.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32657de9731e836926a44b9886b3586cd917451a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1 @@
+DistilBERT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..486af09b4ee18de3b6be5a77d04fa40d9a48798d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b29876282439ba7143339da51d353b0650c10c9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4ef35179a15b2535f1da386172b644b81a06cf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb0a6d0eced2bf4b9b6309bdafd2a283825cb388
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc3727654dbcfaaf9577df2b81abd95cfd564fd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82ce32212e7924a3f5aa513a4fe9bc76d99e71a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e57a4d1d657314ed9ea050e1a7378400705c727
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_15.txt
@@ -0,0 +1,18 @@
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fa6532b9761d2d991630a8167fe6cb479a04259
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_16.txt
@@ -0,0 +1,18 @@
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214caaab48358e8c3453346f58f7442cea5001e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_17.txt
@@ -0,0 +1,18 @@
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5a3e13d1ad6dee89f167fe1b02cf88bcfc9442b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69b9b872086bb35032ba571a891ad5154da7fe23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb481182fdf935fef3e4a74c6804f38883084f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06f4658bb48b604b705d493a12f6d1c198ea3e18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c3b02161892a75a303959c0c6d9ee0b664c241f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca0e4585cf4df336679664541db169debfb4f12f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656ec41a82d5426dfeb65559222fa2a27e260d45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246db56aeb32a02ce148ff60d69a0942e72b2d0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_distilbert.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75e41890dae33aeadf2079598db3991a2fc98d4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f5224a5d5b43eb81b42ac4842cdc29fa54ed715
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3be69d034a0a1c7b4624767240481d9689293422
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ce612fcb374146c5bd3cd8f646e39defa2cc9ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..251f22e62650d7f5bcea7c4c2f0df1a88232193e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d941486fc8258bca0f911b77e984a419c3e40873
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5df9182151d1f82511a08da23563ffdbf98b09a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dit.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ca177559dbdbdb34775724eb63ab5c9efa1131
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..231a8951dff1990cb34b5149217283b24d094675
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..135fdd7f9a10d09ce042cf5e653b021aa77aec0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bb9c9dcde291513881a9ec11271a294747aab0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_3.txt
@@ -0,0 +1 @@
+Step-by-step Document Image Classification
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6d6c0eb7a7df5adcc9d2659403aeeeed6ed791c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a42d36b8c294d9380dff0075300e485908968c7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_5.txt
@@ -0,0 +1 @@
+Step-by-step Document Parsing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5893ce837d930b72b3bedf6fa4d0d35b55a64ad2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10231fab1f9d0f2c8ae56ab1c56de985f2b78248
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_7.txt
@@ -0,0 +1 @@
+Step-by-step Document Visual Question Answering (DocVQA)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d70870ce11e107bef76bf84fd2a6dadc0e27744d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_8.txt
@@ -0,0 +1,32 @@
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d41add334c858b697aa5a2914ceae5faedafcd21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_donut.txt_chunk_9.txt
@@ -0,0 +1,21 @@
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66d1c8221b415f01b36f913f099beb4c4e7e970a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1 @@
+DPR
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dcab1437f02ad4e1e96248a068a6c7da951e242
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f96e5d9a45d0b9066c60f8dcc1525a6668fcd25b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa7cee55a7b29f7d3b158c06c09636525d960b70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fa76049bd0e92b38e1ab7c472c1992ed7e94ec9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpr.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc4b388f650164707ec5844efa5760a13f27748d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16b082c7175a4eac976e708efd501bbfaeab14f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31578f518f61417eed537908aa9879f13f52410e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3a0f1ccb76b79527b2453e186cb337639fccf7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_dpt.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..829276ac090f3d1300635ba8126a3b84e0eb1c23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86cb7f073941cb74844301e5a10b9120e5aff1c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02321975847e634a9f3eee8b1a1a3de1360c36be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac99bca1796a51b57e6379562a023d860b6a71a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientformer.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e71a6bc5e85ebfea35c64073fb338bd956ad1d3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1 @@
+ELECTRA
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aad3b65e45ef5395482606047031ec0d7e6ea47a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50ba46ede36234974824fb8c239e1af429cd3456
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f792eccc718baf002888210eec0169b4dbeac3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fc316a9e090a9e5e22c77a23eb846da6fbe8ed2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f427f47b8be136b51693f359e651674800495a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8583168511265ff52bc11782fe2e3f5d40422b6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23fff0552467c83618ce5c34b217552b9de92f6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_electra.txt_chunk_7.txt
@@ -0,0 +1,24 @@
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82eb97db681aef3986b77deb896ed36dbf427602
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dabd7b45f73908529f79f7d98ab5183d57f71e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb15025e93bd03519421d2983474c29ee60fdd99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encodec.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7d29451a768277d45da5e9394c17bbf0d9315e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6146586c959e7fe31e12004cf3150ccd3ca28f10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edf4b6fbe969e98a076e7768ecbc5817d88b6b74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..728c4efd325632f0cb67683fdd3ec7a60c9252de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dde8ab885540f5226bc3fb7ba0035fdced824318
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a82d0a3cec5bec793e163bef3b746d771c695b81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76904ec67d7b06cf58e5710dcc3096b2db841f95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e839e2cc20f0175118ae452b39ceb4535055e856
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc8871564672fba9509ec66dcc2cca88767c9ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd35f8f2d04d97692d346e3506dac48e95052d5e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c8250ba29b326d3b364ae525a58dd7544943853
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbe14060502b8121a884c890a533bb4c4148c1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_encoder-decoder.txt_chunk_9.txt
@@ -0,0 +1,15 @@
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5664aacb4524d31bfbd959db145a16e132418c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63d7aef98695d59f375c03a7e4445d9ce8b8bd03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c19d2807380b1cc7c2eff4d1a4c748eb0ca52f6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67024636f41fb0266124aa5084f3a83902742caf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c178b4f39222c4864f25e8bd30eab33b8824220
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf88a716e694df04fb2da810c157ae3a22e3eb1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05cd63b38406ae17e06cabffa3e53ca3d2560b3a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ernie_m.txt_chunk_4.txt
@@ -0,0 +1,26 @@
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12e47744885afc032ddd1b68fa8d980d56759fa0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab9c0f588cc9233b23d664b1c97b88c176146157
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb4c1270522243b47b3350b86db3494238c3b20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c962a01b29aa894187d59637315408153d2c13f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..267f23b64bdbe69e1d773bc1f0a65c2870cf4370
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_esm.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44beb483752e8ef9ab0ec861c2e0c282313fa9b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b452024bd3dc5178750cfda7274d0ebb2dd7c47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e3c5d1c800dd5a01dbb184751133bc8a7c874a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_falcon.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46c39d57b52b077414586191ccbe34ce610a2c06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7368d644262df1977928ee0c3d126eaa69ef7005
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cff68591c5614486305ac174eb29619d6fc9062b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14ec315c6f5aa81bba0ad13aaf057a2457ab1d8e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
@@ -0,0 +1 @@
+Run inference via the Transformers modelling code with the model and hifigan combined
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0401289fe3e600217570eb7b223b1f8c621c9446
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97594ab0e01eea4d289136e861d6f0ce8fae02dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..954cce362d97aad66113f411c1b4384963dba063
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fastspeech2_conformer.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2af57816fe138b8052da529610643f65ca689d3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c1553991d169178518027f5db77475b2fdc43ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f71571469f3f59fe422dffd7d1759ad6cfa91f68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-t5.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2fff0a479fe3b65044fe7035ae694b851647170
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8071806f933f516b6e9a9d8c0e1a8f9fc78a9a24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2239bb7bcd94470efd1ecf87d0d4650b963991fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c5c0dd03ea191552678fba961debbf84505912
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..755abb483208523c85d79d75e4e5214d8c28928b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flan-ul2.txt_chunk_4.txt
@@ -0,0 +1 @@
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..287b21dca06701abea0c9fb439a6549803844e7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1 @@
+FlauBERT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cf8d75adb60f7d7da6ffc50ce8da3fcb23143ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e8dcba42a466d192bc2af7ab57be72e9602d92
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..238f10a379960addc85ea40eda41b544ee25f33a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ce8720a8ab4b92bfecd74b0ba29d6c9c428daa1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flaubert.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c7e698f36a60b53ccaa4473e349b22fc528b96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,53 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7afdba15908f5f96c7deed384b20933925691015
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ffba713f6f6fbd62c47b5a5e313452d07fc1a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62958eb82d8c50b2393a60608ee9fd6c2f9e639f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fnet.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8e451c3fa90eca12aeb8ca9a43535908d609e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0daf3b011f6e589713a1ee5eb1176f62af128a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fce873305215b6866d7a28f9c0ac9493438fe24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e61cd2cef6285f97c0dc11c1043f3bef731c0960
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fsmt.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2ba0217a9c3349dc12e637841ece2d52bdc27be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1 @@
+Funnel Transformer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28be235e109d05c595b80ee3f92c52f5ca8ddfdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e50f5e5b5e02eaf727e4cde9ca15b9c1b6d7c61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527e062c55adb36614a5c87a38db8ae3c1935ab2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e9b9cc16de2e5b7f313360ceab645577e102549
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8fff712fb0c630473e1881aa46ad91b6e457a37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d534dccdc612a8e2540b5a8290b3b7b1c72a4af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_funnel.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c9d239a5361bdc18fff9160e227e33c5f92e0bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2264c3dc6bbe5a0bd13569bb6e37b8c98bf5a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c127ac4dd87b0e8067954d184888c38f316d1616
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc82f4a0da0af395218869b66f820f397e0cb913
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..413e2e3c6def9ee57a145fb6137e2ca477791776
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f11eb30ab89c2b4801e9fd875158a5cb833ef563
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87db87407c689c4ba299293487d38f1a0baa3bc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba9d308461e7c24b30ce3f7f2b3327faf9e2a28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_fuyu.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc4296c700ef8bf8ef82315344898dfc6ec0d9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6c7ec5ffde9aa7dc20f395f6fca097e026260e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_1.txt
@@ -0,0 +1 @@
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ed880d27579f6cfcba5dc4986fc7274f5750215
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gemma.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1152cad81414be1a727b73d3fb5c7076de66889d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b18aa72e294cfe0ca731604e1dce62b5b5d0bb74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b0b316b0303c935ec342c7ded26daebb1776684
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9f369d382bb38b425b52fc9bc5e021181b0f3cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_git.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3acc7eeb5ff52da07a5865b33b0d703c1f5a7a54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81a25106b9179ed73b2af081d70d5520f92435ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fcd50a900fb0588cc088b376db0484045987b56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5089b293272d3a4bd1235ea0ed3d17f306188467
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_glpn.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6951f064617ff08cfd47af47cf5f4811b444f906
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c0fb96fa7fdd035c6b95d5dd89ce907590427
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb011e2beebc058fdf381bf25a17949a484321bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt-sw3.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e39fd2a6464e85d314503ae965def2562196ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1 @@
+OpenAI GPT2
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b869f26b7e956f535286cc09195b13a243a8d5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cbe48378bffeb2e6e0359b2925c4116aa0a21cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_10.txt
@@ -0,0 +1,36 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e37d810f6553e741f1b3f514d00e022689fad202
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_11.txt
@@ -0,0 +1,14 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..792392c1b8344b16f8045e7f21d14f4b8437de4b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c16a047ae146f36c155a6bbb631262f66b63006
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2fe7ca95fa6f979734037df1769f92ff869590a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c44c8c1a36f7920b0bc836d57574c507b74722b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e90de1aba37283ff654850b04caf449e7c962f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a8f69e6f2ffed550264b90e7614d4cbd6833d4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ed734d3fcf1eb24bda2516d5b4896d73fd477eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e8d78a157959c11e49cca5fedaa1b66cc2636c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4016fc9928e4f51340fa43be882f76b30668e7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..317a9c4921a0a925ac8586a888007f96bfb177a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..789a8e0e99ba559f0432d5d9a2e3c7ce26e954ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b842e2562c3507ad0b32c9a75873657adf5b22c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt2.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eadd04db2c31d3966cdca2cd6666671df3280fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a8f4227594074e8a54b589658610fe9631e512
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7956d0faa04b2fdf644cc3a8a92730975950a978
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9284d05e0adf21d30b1d1c2cfce7f943665da0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_bigcode.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b084465aa5c342d099ad6d4602e6ed8f443607c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45a4392688b5ae436a164bd7ecfbe402c5b48ac2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1bd38440d1e25755a726eadfee272a03c99e6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91c44ccd84a867a9cb3dbc7c4844ef61837bf7fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2abf590f6833d5652008abfa4b8789eee354a586
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..035139c531fc12fbb58f83d8ed5235c93a8fa0be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e5d754d6bb795af1cfe33a83d512fd228f907f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neo.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd0bd5ede1f025ca9c39ad864da5f5a75a18b42e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9757874a038b418e9666730405e4566b75b1d0cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3061948cfc44d91530eff2617902a1af0f9aab8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6658f5a1eef3cc560ad5acf2bb4283e647740438
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..829c60e7372a3a1ebd20d07ae1c7799dbdc194f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..755748f37f7c993cfc4ac183bd71cb5b1846671c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47c0324f1919384988a648f27ad7ee21ae5fd21b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93d03e2c3332290dcd69fee54dd656acae912f7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e60a1b7fd1bd3551dc701192620c71e189837a63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gpt_neox_japanese.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06db7665d6713b571ea25caa1b6823c494c4243e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddade80e297cb30916136b644a1ef3dcb1ccfd39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b1e34dc46c224ba471dba45fb45d993311ed16f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_10.txt
@@ -0,0 +1,20 @@
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35f412ee1763a7a69fa70b3c9b93dc65890666be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_11.txt
@@ -0,0 +1,19 @@
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
+
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffb7c425ca8e38435ab67d270f4d6cc756a47d15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f974d7d132a86597e14f01a551a160f90acf038
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aea376bf10325da0024c01d7667955bad0966cbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a0b292e3d07944b66c42fceeb28fba183464d1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8a952b0dd42b333c3449692a795421eb6683bad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+or in float16 precision:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2c96c801d07e3b6aa638422c690869d1f31f66c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b416da27fbc4d7aca2fee389eaf54a6c29d5b85c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e7e52ebc0f26fd0a455dc84a2b0b7c6cccf623a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptj.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..daf4e8d986b86b8cbe5d854c3cf3e61056fc25a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a57bf8192dd071629fed7ac294a3be6fd0af64f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d09fd4eea5788235cb37ad35b9b1a80a31aa0b06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20ce971dd22c01abece6f534053e75a92618ea2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5efc62a7095dacc8d125a3efe7bdca4fd84bef1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7711106686d5f0f7655e05dd094f04a385c61de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_gptsan-japanese.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c4a95bca13a806274601d7dacf9b601e8bb7c34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d176cf06edcd98f60070e34b78a388b96289ac9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_graphormer.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3620be3f60b6f6c9d1ee01a5dacdcf10c6823f77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fba293cb8da17c1df2bb3b55e4882f9d85eab1ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f13db24f2b6065f6ed250c735a5c9f5b64effae8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f4f278eba86d7df329b5c557d5d2ea47c4cedf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..500bf3e194d7795ba07e1f341a5d85a8dbc38d0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00d1be5456f81c8c1bae5f57dac8d9bdf9b5f3f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffaf470cf1e9583948baa7514c832ba9e4cc0baa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_6.txt
@@ -0,0 +1 @@
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d47f812d70d661333675beb5749831f52998202
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_grounding-dino.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69a868495adeaa5407be1d4131c402db1680ed37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddb6e2b082431a4bb382a7c9b7f7c369ea3195fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8c0b9438614e61c75a7348d3f8a7a9053a7c7f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f40794ce5f65bc9b40e11c49b71a2f121a02ac9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_groupvit.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..224e3f96213609b489aa65ecc0306586396b7a0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..401770d2fb3bb9d54ed93f8b1287a6fc30e0b191
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f44463d24e00bda14e9acbd6144f3340e86985c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_herbert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab7b68ffe5a78e3bd1587219c8ff19ef04b824ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9ad34db91b0187ba25a99cec19dacba24558bbe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3061948cfc44d91530eff2617902a1af0f9aab8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be3f6c2e4793f4b8e28add5851d4e193e11f99ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e0e4957b23b6568c556ef20bd30e32e4604bb94
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c94b4ab8d084e97ccd41715224748ba941b2b5a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18a662d9d586ff3905ea5fbe52ef9f313e81c430
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_hubert.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6d0acb10a6e317a41d6b14cc8afc7ff35de40e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ffba713f6f6fbd62c47b5a5e313452d07fc1a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab0e859ddeb030f5f27768634f3af94cc36bcb78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ibert.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63af8156d3da9025093084f3e3cbc2b533fc3764
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da8d983012f41dfb26ba50e5c29be9cb49c0f56b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1140157724c5903d62c404590097a96c54a5b7be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55a5c86f9ceef053d151e6607f0dabaf842361
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a00bd8a0fa8d2f2c3c50d6f6f159c4d053455879
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e482d1cc8872253927856cb7deb79fb37577a2ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25867a437ab1c12b08ac7a1345d92eb35caaa0d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_11.txt
@@ -0,0 +1,14 @@
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03bc03beb4aa710e07a90f8afec45dd68f82bae1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5782950ccdfdf594478ec490657c71edd123d432
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49b36bcc712713626f222f6d315eddc71c37fd41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_4.txt
@@ -0,0 +1 @@
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07adce36c146b11d978c27d94035354c12471425
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_5.txt
@@ -0,0 +1,37 @@
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e89d5a554f43fee0b714829fad29af45b0fa7ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f00aba6eaa008d2c726695789d48b57f4583d80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c4bfa0537687f9d3b221fb88524b8223c9f7209
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_8.txt
@@ -0,0 +1,13 @@
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0c4f444ba1499111e6a3493e1bb595ddea92233
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_idefics2.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..056010985ad8e07c2d209d7fdbb7cca326a56ccf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9cea44d478dcc579e63ddbbbff7a7e19e4cc358
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64cc96d5e341ea99488ef210421c40a1d8273fb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4105cc1210db0901c3f5bd8ea98a1be65dfaea4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6e52f156d501717bec70d421995aff659fc8106
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1813fc25671dfb993665a06baa87e161f8c8744
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_imagegpt.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..112703183b62b7dd5861aeef1322fa302b0a999a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78a80e97e597487584543c46d04ea9bc1dd6c3cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_informer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bee1b6ddd32c7ed7ada66e79413d08f7d0d5d00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa9626867e787836a88cbfd573a50c0310c0587b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_instructblip.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..825c7b261f69223c6c9d36ab032a9eb47c449560
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9e82b4bd2a38926ba2c38af0947f6cd4ab884be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..290ef17fb15ec1503e101a55d430d664d655f5fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0da78de76f4f408626eaa4fe0e3889f94743ee5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..972bd7f69f2e246f035e541bfae886c094c694b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7951237bb5fc8eba48e64a921f92de04b7af1ffc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e54de7a2e0959d037df1cf860086f5b377bc8b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2b811765b072c511b993728f6c36b81a04a193
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jamba.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83c6bfe8185cacacf6359b63ca17dc4007858c60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0948abc2a6ea4ac2ba9c6165d616ce6f3efe65a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1bb78b146202c1fe6c37cbac0790dd254c62273
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4ae3ebf68f5eb64928daf94987c804ee792f86f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdfa1e60b629fafbc2d828749e4d53c2f2336948
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_jukebox.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3276863a24079f8cae86d307215f35fdcd7d4b25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61e928af5f48b688e4f1646ef9210b5b279017da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edcde9b6551bc1d562323c3cf6b422282ff928bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f61b7745bfb8e86d9f47e2a886d3d4d6efbef06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_kosmos-2.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6821572b0989c161638554e28d552e7720f1f05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4985987c17e57fe58ecfdd3f309c5edea115218f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e2f6a52b5cad651580c79207f1b05affa22f6f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831327aebb6928f87131fc054e9d34e14b2e294c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..749e0b03e1b3b3b400638d97ad183d1b801a6a6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1deb6d3c3a0b1d0beb188898b42b1f95533940f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12f3d194a3416e55de1e87776483d68e794c58ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3e2bd6d700fa77183add94987a0b7e237c8e62f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..198eb1e47061b2a93f15a586d7c64a85f4077dd1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93cc4c57092ea5093fd7ff317f2846ec9ea949b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlm.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a9f9a8abe1479a7fb34e625cc7d36807460c304
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f807a9cea01c446250eface47c8f7987fb637c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b9d71928f8e9f7e7f82d27925c16060ef501d89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..529e1588e12c8e292ad00e63e9f38b67d139e5ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_11.txt
@@ -0,0 +1,12 @@
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..129013ab2afb9a879c0b351f9b3088c01f8696d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_12.txt
@@ -0,0 +1,30 @@
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d3104a653f678f83557f54ec2c6d835b8b2a79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_13.txt
@@ -0,0 +1,16 @@
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f0bf77a9553165d9f3d89f166fa10c9ad2fe01a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_14.txt
@@ -0,0 +1,19 @@
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34edbe923dced4f11abfe3692f7765dcd54f0351
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_15.txt
@@ -0,0 +1,14 @@
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8751dc54a0927cf404cd5bfbceaed542d4492cac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_16.txt
@@ -0,0 +1,16 @@
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..630cc4b6ca09ab303c06791277476daf68c1e44f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_17.txt
@@ -0,0 +1,27 @@
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b70cf30d4d4849e0beded8742a74f194d2a3ab14
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc075e8cf5d897ae5f513586e14f3b2206eb1e2e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4217494b535f75b721938662c27637533157fe8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce988886525e8cff8da15a3c88cd235c73beed06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc22beba683546aa3e09d76ab0ed256e197070ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc04f46bbbddbdb1ebfa98f323d71194422e2138
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
@@ -0,0 +1,13 @@
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8411f00cf2c8c7c03024bd87b7e20b3f7379aa65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05bf93f501b9616480c24917ddd3d383b2cdf99d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63a59a98c6e4e1018f2c0acff233ee39e03b1200
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2c7b3b1b9330c89d42d0b5d0aabe6bb4e7b39c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a312fba68969f288ada00190404cca78c245c27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eacb4269508bf7781435623a2c3a910473d130a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da927d22c403a07985d77800b0946660256c2136
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c60583d00cc4ec22ff52c35f3d054fe8811bd4d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90c880f4f6e010e8bbf9a0fd61bb2c0d898259c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16cee635609369286743acde048579f3becfd91a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..932efb8e6045e7b73db64963cff1fd067ba5ded3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutlmv3.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00f7a949997756367dba62b77bf72e0045ddebc9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1560d7bb84247511700bb7317839444821d7c409
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ee3ce0dc0fbf12d04080f6208b5dba965dad48f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_layoutxlm.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12fb4c0fa9c2cc2ec8675b6e635d5822a8f922d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39e5f3a563330d3e1699fd57bcf1d46d039e2e55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43dabdd1c3ea88215f30995d283990d19eb81cad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76bcb72348d85fdf4dc86935aae476e3f651c079
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aea7b14da8d227e56a61c9b52d80e380c752c983
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_led.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db3711084d2fef7479a4a6ced882e5516ee70a99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..370f253b4a3e0f94e9d7c57405573683e239c714
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71c033aaefb5a41cda99fdb73e65767873b582a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c57799fefb34fcca4c2b29ad31e6fc8d6c0c80d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3aa0931838660420c1dbfbe30ad5c40242533d08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_levit.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e844d35eeb5209f0d2affbc5d09a7de29593ac93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a5a4470c22d25c1e9aca73d9133af1a378fbd18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a7c9b74221af381678cbd2b78fac7335fc0520b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..857cc2621cf665227c0b0363a8930becf34ce37c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc181e48af234aaa45a03802e87b31d92a9c3fdd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76c894185b106ff6794ddd1eade3897069ed4276
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lilt.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50187c7db413a9c56e9de6872465b867faef6b76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f70b8193cbf1c7aff8a236bde335893315070c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11ea4a7d8047fe7a45658419d2fdc6634f514a85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4537171bc76eb33962767471b31d6ba4510f62fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_3.txt
@@ -0,0 +1 @@
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fba1acbf1feed1b6e040a4adc54dfc8750d93178
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..594750f2d7bebf4d4e15b384c2986b468bbf4279
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_5.txt
@@ -0,0 +1 @@
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f6a0982b75d65f3804960877732415accae59ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..611cdcdae0d7ce5f7d996696062b6c6a549c8d32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_7.txt
@@ -0,0 +1 @@
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c477c78995c45d1b25b9ca324310b3ba5cb0541
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama.txt_chunk_8.txt
@@ -0,0 +1,44 @@
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a074def1a780a8ecbfb219b3ff80cb8b8ab9709
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..044a264b3c27125b78c37c86869657a862766e1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d873b71d56a6b277a0a50782a63b9094c4f662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_10.txt
@@ -0,0 +1 @@
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2525cdf6b62b84778d636f283494f9fb8c05c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_11.txt
@@ -0,0 +1,34 @@
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cebb63fbe2840281f22bc7627b933cc06c227baa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_2.txt
@@ -0,0 +1 @@
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e3bdc73d05722f19d602eb48650d8347f8ee299
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57edae7f9bf553c8cbf131f9d704500ae95b70be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b4ed9833245d89f2ffb55427dd6d847e08a264
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4537171bc76eb33962767471b31d6ba4510f62fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_6.txt
@@ -0,0 +1 @@
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beae9d84096e136f4008f2577b8bc17f91293f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_7.txt
@@ -0,0 +1 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2f783dde0c5759228a9e04e3d540471d37e94b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba4b16051b12fb704d06af5b43e476a4a5d0b245
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama2.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72c1e0867106dd6493490190531885eeca95830b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aac69fbc2f5d2db09477cbdc10394804eb86790c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cebb63fbe2840281f22bc7627b933cc06c227baa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_2.txt
@@ -0,0 +1 @@
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71e3da0c1940cbca49aaec40040b2d12958ed6a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81246361998dc6415e2751289d7f18539bbf05ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b4ed9833245d89f2ffb55427dd6d847e08a264
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beae9d84096e136f4008f2577b8bc17f91293f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_6.txt
@@ -0,0 +1 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5a033cdf413c37aeb25ffe3e57cb86c4aa270d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llama3.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6122d9870c0f9dbb9f009b2138aeded50aeea389
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab19d8f5c3716d00e2ab88c1fc3d160f28e47215
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac25ba3a06097ec0b6a151407d65106679e7f356
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a988b2bfffcd0d6600acafc9f2496a1699827d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba3d2a2d256677787aa6b645e6f46bbbe8401063
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a8fae85642d430eaa5d51056a2fef55c5b1869
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7ace2c7313287dc5110b64c41f51a75f85ef620
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1185f6254360a5d6601c1158bc27eacf7ac877ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61e3c4bbc7f29784c840e48289fdb9a4275e8afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..334518e7e6d45b3e7ffc12769f4b80544f9d04fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa3749db6f6a4513c90da6a1082bef81dd4acca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a19d6d0c2c5217193a29b5c294aa8208f3d0e46d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..197c3fb1a0aae7459e2309f60f62cd65bb12ad03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..684ce5fce2c1a2e177316730d29b6a91b884d3e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_llava_next.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d5b56956648e7321ae095783724b1ec3dbe9c22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1 @@
+Longformer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a963ce0e9f902b104454448b47868b14b3b3c04c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfd13ad7eebf12ba0a8192993307bbb310a5cffd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68e2519c0cfb2755d6b13adec9cc38bd131e881d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bce7ff95f808e9952ac66623e6e40d16523821c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+0: the token attends "locally",
+1: the token attends "globally".
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f18f15e0db0fb2b468f8d874e02ab7bb2d946f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527e062c55adb36614a5c87a38db8ae3c1935ab2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ad537467a364a2973282cec8a14aff66f6b6f78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0464fca2d32d5b5bfb39e666ea9421be80d7ce76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6d85f1e3c5b7865116fc6c39809664e062abc74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longformer.txt_chunk_9.txt
@@ -0,0 +1,18 @@
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d08599a49316e3bb361bfaf6d5ec09eb8faa424e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6aeb6338a65b188584b64719caac08e7b85b79f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78d65f19e5300ced0504819228d23465980a45ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_2.txt
@@ -0,0 +1 @@
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a54e463e519226003418d0fd683c034093f7d12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..779dd8a6c02311bd9d47cdf4433f11a3da0dd3da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ada2d4babd5a736c426ee0c7899a85ffdc05530
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_longt5.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e789949e80c664f9b1a8451e1fa4741630ba644
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e5a7ee75caca67c34e69e775dcb3e3dcca0adea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddf78f51ba4ce74c221fff170514485403bc5a48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2495476fd6f0ab810b0a19c61db201ed52f0a55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_11.txt
@@ -0,0 +1 @@
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b21ffd9fe583c398451acae16c2cb246c3050beb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_12.txt
@@ -0,0 +1,8 @@
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3aa22142a6178ea40a0886e22d463511d929e69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc46983825fd5186c14676a7faabf23c8d1dbaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_14.txt
@@ -0,0 +1,33 @@
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dbc724b6356b078ed208e0d28db7b517ec4697b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46d6c92b221f611ed84be66e1caf256d274fa6e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ec5715edb920135369e42a0252f6e543fdd607d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a6d773708c8b852a0ca024a3d149653489a432a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a4cd1437bf3f6c689a9e2f47933aaf5a96ffba2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96aa93a738775b9c8024813796aeaacc67b61fef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..297aa60b6c07947f4bdd289b38455dbdafc3739b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d917c683d8edcfe6ea413b7829b37cbcd0d7bc55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_luke.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..452eb7a7d5328a621376a949b76b40524b312106
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c202f2529a4ac1c2aa62121655991c236e628757
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..849613bf14791f490d85212852f01f3bb8004548
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Resources
+
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e25965b9364530391f7f961d85c7f689a6291b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45ec9ceafb24606e208da6702588609ac17ca8ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_lxmert.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc23bbbf356b12364523187d3966f286493a6f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e2bd3cefa5db4f328bb4b634f40f40a0661c8a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..438edd948a35e43099372ec6556db3e2070d1e98
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97264ba544688ee848d52e9e1e96ef7300adba20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0a77401fcf7fa766cafcde35a0cf0bf94f88d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bd14634bedab7e3d31d088ada25d71da3b081b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d0ce7e37ae79c466d87198ffaef45c4ebd55d9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e22fe8ac47c6e4b34e597ae95c7b0eb5f31050d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_m2m_100.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0e0aea4e20be6ce11b798eb3ed452ec206b281b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3721de7c4315dfa27f49b38ceee435ae4593fe76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7ebdc2b5376535d6c0ba78f197b53f5a94a5177
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_madlad-400.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c897cae3d4e27b6585cfa0fd899be2639924173
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20fbeab84e2f8db4cb7469effa88270daf08c8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1ecd96b64be3d224c7655f84a4037e84b7fb05e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc3bdcd4df3c276259d89e1b5092b3a5634cca8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mamba.txt_chunk_3.txt
@@ -0,0 +1,42 @@
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc0b174e08585f67bf697b3b167667801dc3fb60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a396655b0d7366f1621d5e7a487ec329389ccfa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d950c0e9652ee9be904b961b46cac033f9b984a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..618a59393a9c22cde9a695f0f6cce2e8c849c40e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_11.txt
@@ -0,0 +1,28 @@
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3074d463d0cf2b3b3e2a545ef61efb6f098d0c7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_12.txt
@@ -0,0 +1,6 @@
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..573028a91346c36723ea7cd62142f7725f8daad0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2eae673028168f5ca24f257bdd3265636d3ee3a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bda6c74aac54c1f4712b240b8a4f8bd7fd8f7471
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..381703f6e58b9fc02ff7f0e8c6be6966b4cbb74a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e878238ae8171a14b53ffdec9681d34f4a83881
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c09b597fbf61abda458f900c815920211853b55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5deaf5e1066a603d582697fd74145eb81e8e8f82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2efdb57513a210dd52a04a049603cdce61d89c51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_marian.txt_chunk_9.txt
@@ -0,0 +1,27 @@
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
+ 'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..252d6681ac110aaa68b79dfa4e69ad4cf5ffe391
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b59e06991a0caee6b104e589ac0f8a05c114d7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ebfb814dbfbb46211160be75d52406059523fcd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_10.txt
@@ -0,0 +1,17 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aca92c9b5c81105ac1e364cc794f7cabc0136e42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a3b10120df5781ef7e4cc3194aa369944e22d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38affdfa3692adc1c33de74e076daaac94a96976
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55cb02904aac6577397963cc878bc51cc1738579
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_14.txt
@@ -0,0 +1,30 @@
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2875b3908740e6fb3289651607fa1dd27626f6ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa017bba71b2cd44e89d81564847565359931f02
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d39b012167d0d4c397caad40a080d523c1468a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..827ce55f593d28e3ae300a3cb52304345609f43b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc67c925de678f968ca99a68dd15d41e409b7266
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473794a619620b763b51f3640bc3581be8f7d462
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e500a658ce041735d0e8cecf1b22ed73e43c0aec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebbc3c08465a24d00b750dc7594c058b22b64101
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_markuplm.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42c23e0eeb856d2f415c6c3506358d64d04584ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f4dd1a6b217288c2bdadce70c0bd4ab3e18c677
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ffaf41e114ca0f881c57c204ac1cae608a84cda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0adfa4ec0d047d6db761bc318abe574679757612
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d420730688edd00cc7079ae91ee065f1416993e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mask2former.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2cec4e28ac50b4e992332feb09c7432954f7be1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c8a435a3b924be8f44e197ab7e33ad2ca037cec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c5265609e4d23d29760afc671ee7e3ba216002b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by francesco. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8bbe4ba5570de5aa629de5065aac07b83bd0d9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ea2cd9aec3dd5ebd2464e13af361e8915febaca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656da52e7f10ed2f6703e7bc31795ee344b24ba8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_maskformer.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29aca645518567dd8a9adb693bc2cdfb91c10853
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6110376c706af11a222a20da3ccc62ea25ccf48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a797e1cb6b08511a1bf7206d948b652875335e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce2a0add85757a2a158a7c253e73c6dc96037588
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efc2e27bd253dbcc0b7e52eea983a3bca3a76ca3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_matcha.txt_chunk_4.txt
@@ -0,0 +1 @@
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0ce09fd31515abaf8016d5521c8aa34199df63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1 @@
+MBart and MBart-50
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..508d2f7fc3024a145c7f8524e2700c3c88569260
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d38740b2b245317e9c40c69ed97c0390c6256b00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_10.txt
@@ -0,0 +1,23 @@
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b57c182b480a8379567415cb5e0c6ecd530caa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b9bad1be757bbe67c6aeb842efdf432f8deb74b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_12.txt
@@ -0,0 +1,18 @@
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..703e332f52ba0e18ea30ccf30313e02b4a84c890
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_13.txt
@@ -0,0 +1,20 @@
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b69dfccb51830530e505860bdc1f4e27a7080040
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Supervised training
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..919a17c67abb0b2364115380902c7c255ad3e199
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdc40ad4083398f053b038b6acca18e955205bdd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c59e97de24c361b1d654ff7b7ecbdad312a796
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5e76ab9aa62990ad9a917b5578c21a330fde644
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_6.txt
@@ -0,0 +1,17 @@
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d654ace8e4832bbe25b721e641cba9ee6af569f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_7.txt
@@ -0,0 +1 @@
+Supervised training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..412bee10597657bafa11eb9fd2cd7728960fde7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccd2a297523aec70a96b936251909717f0c58fc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mbart.txt_chunk_9.txt
@@ -0,0 +1 @@
+Generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ad520739bf2629303681dcc22a298b763f5bd1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf4f8ab969c7cdd17c845d7f035dd422be4e2933
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69316c603fb2d81984de6283fc7a4275914e0b82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mctct.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a13377f23901f73626c8a2b38aef3677fa60049d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6e324c28711ee980eeed9bdd57e737bf17328bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f747ec8a21e6bf4bf0362a20b671bee7985c6c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..377b82f12ff58704a8ef50b4337c66072ce42689
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf7f51f947ae5356d59833507421119dc8b6d521
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mega.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c96629d9eb021d4d9f5c65a839cf34ba5ece5ecf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56ad8cb3cad427b2f19c03e30c194a689c720425
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..071573a296af91e11a162bb851968726e162a0ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..086bf984fc9a0c55f18f439537d0641ce1d77f34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b3d2bc87ad8cb72dd90ec76683be99dfdabb6ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron-bert.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66e0dbc8e397a078d31abc0a43863e9ed1d39377
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6cb7f5e999925a0f09690780651f23ab017115b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a18b66155a5881a9a5c43b900e45ab3ecca7903
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a843456bd59923bea06c80d0342be035fc46f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c8558fd8f613317fa73b0b7ac984e6dd9c915e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12804cf0032815f677f54dc1075fa3a6c6be6b97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_2.txt
@@ -0,0 +1 @@
+Step-by-step Optical Character Recognition (OCR)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdb46e6ed816dee2efa2b08f5fb377020286b432
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1b60bd650faefc1df6a39ea7a91e3b74dd61e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mgp-str.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..466f8f303c01e190bf1457b0eb66b7ea38184a82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41ac51452a77392f0cfb71278e4a26b81cb88acd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da8028f7ed3a51d082b9d4bc677392f8044109d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..866b09802fae7efce91ea911c67ddbae24f71d29
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..380bf6349a72223ab74168a05385d82a29e26b1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c225c9f0eb16d063b65eb76a9b48629fab045b45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc6d9f40a837fac5637cb7a8e22d9d11e8652a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_14.txt
@@ -0,0 +1,29 @@
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca2a0ce03fefbd7ea73a13b49f796c2def41183a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b19c18f1c0f3c7322b30b63c17728ee966af7646
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e40c36c2d88b51e4fdc0551ceb8c3c184baae9f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e2c31257114f353c2a4623e5544ad740de468fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17d4a45c09d1d5fd860fb7009ce43db3844ea45b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3623a7df1a0f798c80e388e0c97635e459f65f30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f23226a229be9e39da3df1c741b42bb32f0e2f99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9227a38f6e51b2586099c0875dadc5f09419e3c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mistral.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a686b5e2d008b866fd95d232794520cd20a04e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac192d1896f2f0f5ef058d1f9e6510e572d08ebb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3192ee3593448fee15f70b00bd1a379f8454bff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3645d84116a674f9bf2a235417aa5dc1286bb14a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48500e282274e88041ab5e6d5b141ddb34a2530a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5825fc8275b13efba35ed44c5bd618e53d082c08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..281f12ec3d819afc01f4a863b451b3b6aa5d9c86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2efd0359566a5b5b676ea09ea4806fbf4c08d345
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..971210328d834d629d9350f99e0117061f8a3efa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e40c36c2d88b51e4fdc0551ceb8c3c184baae9f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c244ef4b8a4604b0df7b668ce660ef20bd481e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5808d9987d7e4fd247a3312d889aabaeac68f7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3623a7df1a0f798c80e388e0c97635e459f65f30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa91b60a0d003493b3e6c60b321af622bfc86418
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eef60f16eb5a907e5e884938c253e13026938991
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mixtral.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..facbd2cce3b146665cbae5bc5ef58e80f5eb7ade
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd066dbe0a001d07d446a195e658fec0848e8c89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mluke.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70bff8475baade17c6f762d2bbc677aa9ff718ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15139ba3436c0cf8a35269cbce1eaf67c287bbc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69403c438b3f1279aad0b964e39c7b8c26b484ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..607fc22a52ee001dc817b66a63fed1d6e474740c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_11.txt
@@ -0,0 +1,12 @@
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e9a71fde522ec04757983cf482647fbc1c2ee3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_12.txt
@@ -0,0 +1,6 @@
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0391c5fdbfab3e71044bb3f7407b3ca616d48a23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_13.txt
@@ -0,0 +1,10 @@
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8c6e49b6764eac3f55d850f979fa8098aabe2ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c83bfe5c573d67dc6c6f7b10853072d127469c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04d782eb084aac5793e79e08fa52099aeb7079e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8986bfbd331827a40258ccfb384a88d6233c3e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0739af26159fc9d4e32511d381f51dda144034b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_18.txt
@@ -0,0 +1,9 @@
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..532c576b53f1111a1ffbf199b186be1c77377d21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64c45fc11781e0824dc484f286e046db41faa20f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..519a09ba7f20c624cbd23bdd1ea2983f2c41154b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_20.txt
@@ -0,0 +1,2 @@
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a36ff4e99666e7ec8713318c6a44bf7c8700190
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_21.txt
@@ -0,0 +1,13 @@
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2324b342a0d9c8ca6efad2e0e759e4e382d7e24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95cc76e6047a1ac00cbc7d11cd269a3ebc987710
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d260a60019e8f1986090146d6ca8581e87550b63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_24.txt
@@ -0,0 +1,7 @@
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc09bf99f46839d019e34876bb71611cd78e8713
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_25.txt
@@ -0,0 +1 @@
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50bb23ad5d437c460d0d99a4ae9d709065db7581
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_26.txt
@@ -0,0 +1,14 @@
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b955b0f6e4d64a344ec0b80e86c9afb0c27acc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ac29105e8ab2459f8bd95c7ede821b972fc30cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_28.txt
@@ -0,0 +1,3 @@
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6b327909fa60c8f9337e22d0f34f00aabc62a3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1f799b6933562dd07312d02ce3063251de5c8ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b73c67b6289d15c5c5748f0acd421aae64a9598
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1f604f44dc48528b7fb6bde54654367c5d4c1c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c83a8fecbba2342309f956148524e00a45e0bfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..378c47fdcae8f54ec865751aec04b51f170804ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54dced050a19d9db346684df4b44f0147a324f55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mms.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..412f62ce1278338f281050bb83ef51c69e4b329d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f0f4607bcabfeb0b7db2dda237954442dd4cd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527e062c55adb36614a5c87a38db8ae3c1935ab2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b14553f0270ce46b65a69aa341aa32809ee41cd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..542f16d03583249affdc1b5e0e492bd127a0a2aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd08de49d026ca3e694b293b756c61d8e7fd13a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilebert.txt_chunk_5.txt
@@ -0,0 +1,24 @@
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f3174f415f4e4465172b6a39e7ebc4dd3d9e0c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09a6c91c0d3c21ef0b03f9da5bf0536003b6ea83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3d6810e549fb4f157187ec14fee6b01494542b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_2.txt
@@ -0,0 +1 @@
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4a177cf424d8f9b447c3238a4c9253526cded6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fecf8f32515602c59ec2c768cc20aa965c587ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9c78e3ae8c0b8240989a679455984e02af5419b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b0d842f9698b3a7cc1c8bef34583483efecd834
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da2b44895d37fee4f8f481d5517d304bbe2115e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554f5c0fd48e39f4bae5e993c4bef30639be118d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v1.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a1a2582d686d868cc427a8d19327f7e9a43ebd5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..334824f8a252706c69d2dd5eccde98c0a0a4d89d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75c854bef79b627a78e8f3161706efcaaf7cb94a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..627f9590f775ad6e88a36f1526888c7191cfba5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..209b4e3d738208f10eebd2530540886d16c53819
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52bdfbdbeb26edfabe88b6ce4f79e3e5e3b8f551
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec05a9f6978e60b3cda3ee858beb27964263edb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72b27dc9ed80daab157df03c1a30e900680d3df4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilenet_v2.txt_chunk_7.txt
@@ -0,0 +1,22 @@
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..327674b9808ab26c700a470a4fb0e5649ba989d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b7768bfafcaf0569aa65c153f83f26ac9fb2a11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5b0f53016023285d02f674089428b0ef4c0bdf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..201fa0e256a5ee8f9c550b903f87002f05d2ce5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1625dc591da02751f8466a7dde4f70448c0f6eba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf6a265dc7d47565f2e96719e6729c5c023fc8bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f0f9798fbc019db44c329bf64187c43c3ec36f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevit.txt_chunk_6.txt
@@ -0,0 +1,19 @@
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a60f47c7e4d99a010238cd2c3631d4b98762b7f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0ddae5e8d5fa8c0fb3ec88eede1849923826961
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcdd7438fe0a85782f16a663c61c9c571b70fd0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mobilevitv2.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b60a1297e447d012d49cccbfb076c0f1129e04ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..273910ae32bd05220a8df77b1570ce9989c23143
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b22b1875f6c458f03cc4d3e0706188ac639e037a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf144ae55feec2685963cb4ec24832f5ace799a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpnet.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef010cc2a6f76e09df16bb63cc6bf0a39b1f8924
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e52aa125bbb3a0165983338b4c63d80543449555
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ab19ebc3818ad92b26f7d2b06ceb30cb2613dbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..644e61dd019ed26133555170ce5d8d72a4408d87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mpt.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mra.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f5ac5aa9d405c45bf40af1e8fffc1ae0aa4a7ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1 @@
+mT5
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4752a6e364ef905e49fe4d3938444b68f564c1a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c85c9deeccba1463395ed9ce9a78a36887e9129
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b15bece7a06a470370fffe55f232f5a1619bb47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62494e46a5eeb123a02dd79b4518d8bd379ea530
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mt5.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a2d47ea65e1d9e13757f6889cef1f66c7466a05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e54375820878a2ebc291207fca47a4e527a4d19f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c782b0fd1429b9595aedb13d314689db75b10a8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b5a413acc64b4612bd66b83d104c35b42a8712e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65b64f4ed27878f0e06c5cdf31518c9628de9524
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec6ac358c30e6aa38168ca43e3304900f61e53e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_13.txt
@@ -0,0 +1,8 @@
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..369c099aa9c1b9fc9a5d349e40926c8c77b9e759
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e1e675cb7a1a698641c54a6fb99e95228135683
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abc0d9f15df554381abe4472d924a6f4261ce70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_16.txt
@@ -0,0 +1,22 @@
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d69c24d0a5fcddf4716488173c6c108d41e05ab0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b5a551596431338c914992321874ff1df26d35f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9bd6e75ae9c1624b3192229a9fc75faca900d85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45b327e4b86fc4712b87cc2132d5bfa38149d48f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e264af64ddb452b3d741a992bb0c95056c36d380
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8308062728e9f570bbf0ceeff0a58713aeb1cd89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48b01f7508d2d5b9221a1319b2cdbcabc1398c79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+pip install --upgrade pip
+pip install datasets[audio]
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..307161b54a76a9af016fa14ec682d36f15da2ba7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5a53d98b9d80f1d09bece2dc57b9abfcadc3714
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c73a4e7492db610ecd028317075e46ee0a79f4f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e855c0e15c215f91424ba174890cf7ecb53399bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3554e1df26fe4119290fe807e2bfb11f168926c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33011a33373973cfb3eb5cc24da7f02e5df5c013
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..728356a57121945c2f270be760bd0a2ab454300d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f193d1c46321db266999a9564786defe34a306f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff8f090adaf4edcbfa7e11fa649f1281196628e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af413c3d8c5bed70d92b7a9181fc5fe0daf6e13f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..911a19387ab8c30cf2feef6d0a2477b57d186330
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_17.txt
@@ -0,0 +1,6 @@
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2db0bcd50a41cb20f136e97feb0bd0aa694e720
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_18.txt
@@ -0,0 +1,2 @@
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89ef9a96748563fadd05cddeb87453d3b1cdb830
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_19.txt
@@ -0,0 +1 @@
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bede9d873c287335161248545c2e0e9a609ef13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b3a9bb7e95373c614f0c10018732306450a9bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_20.txt
@@ -0,0 +1,24 @@
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37dc0f4345edfb075d2cdfc296276bbeb8c52949
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+You can then use the following snippet to generate music:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df474e6b608fe3909b37c429c2a4a3fe90dda9de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe131b454bee9eb945f362769cb08c645ec8e150
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9339caeddb42afd4a5123af585d2870bfeaab1b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66fd5110a1323d615a5e63432572acbf216b1027
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..550021c74369af6b8a6075757c15c432dcf24837
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d839cb80d37f8aa865d89bd6dc4f0e8141378aac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_musicgen_melody.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02cb467a192679b65e327a0957badd7ca7879b2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79834e6402962153201b790f027adfcaf146b538
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46bed5ef688b7337aafcd8a94c2ed15b0482fc0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e30356c3b00ae342ff42606aa7eabddb148dad64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_11.txt
@@ -0,0 +1,21 @@
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..686e7122c717e1177beaabac7e058c41f193b2c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac71f64b8dfa479afd91f05f2a35e0e893db5b72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1473269798ab2d15b9797d82fa35ab2343294ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8f42550584af3fd28f13fa5d0fe4ec0a6361e0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4e8169041a339e8e591bcb9e78d89d50a902d04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f2d646655726f22719d4bdcf6f54396fb28c993
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99097206b3341dbedb138325800c16b599e0ec5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e2c88376107c56c5f39f918efd7b4e8bd8eb715
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_mvp.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a52d9e36f088c828f03ddbbe58017ab299b60571
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af80b84c5fbef5dac7ce52c6dbabe3fc4b1a597c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6be5426d6763ee8c9f75a1f12e51edff15cf2d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70272f1cc9b5abee0047df26e7d33b19e075821e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dcd34e9a0f5a124f11de5268ee95761151c813d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e250acea8931265f65333b44665bf20fd35322e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eee637d27af4af7f2e56187a5ea5d3d1b1b11a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nat.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8008fab83980493d160188aae669738ef311dfe7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59bde570e6717b9415fde910c5e874fce5a4118e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ffba713f6f6fbd62c47b5a5e313452d07fc1a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9203605a49d32c1580741b01aabf7c7e6c7ab77b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nezha.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee3e194364fd5c2cc900d5ff4f70a6ef60af7219
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..833096a4d661a25b12ea2f905c217a24dc39fc28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd1c02f79e83c283ac9aef4f568ca95f52eefd97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd77945b4028462cacd446e666befb95ad1f6dc5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa0b04340efce0a1b284b27667dc9c368bb8579f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fee85b48185d9c3e318781438ad1c559dc5e1fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2db3505a4c33c5c8dfc98059d1d25480664217
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb-moe.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e1375a7c87f3eee7beaef26047ee9547073c4a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9908ef3b55cdd356e9e38e3e26c7f115a7370dd5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b1fcefe0c2de306d0a0e33d7622421e2d3599b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e22fe8ac47c6e4b34e597ae95c7b0eb5f31050d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37e67df74493185946f64e0fe28be4a90ffbe509
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6364353430e6af315447ef8640579f9cee7763c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07683fb9556755963020646ac42378ee412f61ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2dede52fb7ded7334daa6de7b06365e7cb9dbccd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..511a3e66473d867b4fd105b56ff4b1c117709f66
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97264ba544688ee848d52e9e1e96ef7300adba20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..408aa4cd6f30089fb1669a0626c99c7bfa9942bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bd14634bedab7e3d31d088ada25d71da3b081b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nllb.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcaf935a0da55a7ab5020916ecc6b06db305bc8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc65e5acc4dab6e5f975e2fbfb15901b7799de73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a27278522d9780e239960e03edfa7962b17a959
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b33d99366ad0b16a3784c1b50601ec329945468
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_3.txt
@@ -0,0 +1 @@
+Step-by-step PDF transcription
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34d607118122b07bafcfa5cc769a6dbb7a41873a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_4.txt
@@ -0,0 +1,26 @@
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44210da85cf34a25d9116a52e208dbd3a09ffca6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nougat.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a430907d20449f4add3021f855aed26aa0793ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ffba713f6f6fbd62c47b5a5e313452d07fc1a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..507ac4fa9b50a47d622626716265366db6ff388b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_nystromformer.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_olmo.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076e4fe9be4478ff5faacb07d3c94783c79ba1f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dfa8f51de68e0c6ea268340774abc1fdd581086
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f85b5f6d3c11e1472bd1e96dc2c3be8d2aba4709
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecd75436fabe6cd2b6a38acb4f02bc868cdc818a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffa0100f101e819392c9e88588750bccba00108
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cae1738482f0f70b9f5c5c86c39d51fe0144208d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b5dde2754aa387beaeff2183352120c61acc31a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_oneformer.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4095b3b3ddf32f0a48540682372059ab8dd558cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e0e89b4df2bd385515dbe8bdef0e223c1ec1a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_open-llama.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34597849dbabdf96cc5116657e0105f9b4b9fdba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1 @@
+OpenAI GPT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d09d91e9d1d785b42d84e5606555a5a87a63f0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f49cddc075b1338973f47d469fc140390c004e09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f81c41cca720daf16ebe9f37c2a207813a1c2ffc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..344a9e7ac91e927f5da159b90115ead027cc8bf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ab0cf1a994e90f3dbad79f2296afc3e3ef6c4eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..780ace46b7e8688ac82feef5b5415c8ec843ad1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88854c173e06f6272807a34c0cab91e6adfba4de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f861f71cfc7043f3f383986260c74e0dd4b94703
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4186d9e4b1cb60792143afbd946ccef0199b40b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_openai-gpt.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec5a90760dd1c335221ecf3c2005e663335277fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb030557f6ec9b5b437756f778307769c27b4729
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a62db3c712efa49e1bbe25925936803d16567cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c77e35a7531e25e29efcf6da7794f8b974d7908
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c54e1daf8f0fe319d0e8a861eb0b50903ef4e7fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67eb83590e0ab05b56c8b26a4f500a00ea980658
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdbda996a6f4718533882b1754377ec475f9f3c6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_7.txt
@@ -0,0 +1,22 @@
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e917157b9554f3404574e754e9333b68698aa18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_opt.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3308c27c3f24f18bd93bc8735e70fdfc27ae79fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473faac42248edbc2bc0a9ba200cfc064e31da17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..336540e51ef1243a25252844aa98b6eff8f3f356
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62abd882fc7d0fb6170a16377a87e6e6fdb2778a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc31bae313ada5a065bddf99146f18535e5ff31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cb1e0293cf7690b2dddc1de45611b3ca27b4398
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlv2.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a35b2e6f99e50b414148226d10b91743d761d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..570cabe570c75a5d852615227212ed3ac3108248
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4703b08bd756f1e918bef005865aae6f2adf864
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75d19fec2f8e6288abb6ad4ff2f6c0cbe1e4499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_owlvit.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6846ab3cf6f46370369fbf478ac6ce2d1636f5ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42920ef482d4e10ed82310f0cc5e69699f573072
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db4c02ca630e2d9a4abf9aec07f6d81fd1f11f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0107423ddcde780241bec62d7c774df9a6acd25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e7f036c7f48b6348f9048a5e1b484c398554c6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86d9e3ba5e8361d8b8622953045846ea092f110a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_paligemma.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7a5d687ff96ebd995349282486c1ce9d757ce1f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53afd24c95230adf0f0e2948ead80a0827ea57d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..443714c5757d5b1cbe4a04edb0ec120d6136d38c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtsmixer.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d17085cfa7d93c81a5fe7912b2eaf95109389b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed33079dc11d0a92ff7bc571ed854ef21cfb276c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1433e89bbbc5b03a180939f1ab942fcd771c2954
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_patchtst.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28b7cdcff47e481a1dd012cb234922bac64e8f5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55000e08e601875bb4cff615d7cc3a3447828462
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9591c3addf9eaabdefc69c824981ef542430a3bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_10.txt
@@ -0,0 +1,10 @@
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcc794bfdc061b138964bfb8e2b07ce74985b61d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..305ef4d4cde6ca7f88fdc7765c0a229c90299c91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d734eba847ed64b22270ee862c78e9dac02f552
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..798b8cf22189bbed2c16bc6db8d390bfd41a8225
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e2d614b5123eeacd61e9411e339a3571e050872
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14a85d028ed7b7ce7780ea747e656135d61188c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44fd7a53f16b3506b3be98fa4295800942eaecc1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da0428dda94652179d1ad01328e04cabdb318785
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus.txt_chunk_9.txt
@@ -0,0 +1,16 @@
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28c6d9a2ddb98f35bf74d412a5f5755aa028ea69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aa733e522b0772f5490e9f0a02f7ae50d1bbca9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pegasus_x.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1538f830b25844080a803d1cf1433753ea975d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d97b04db53f8211323c3fed6c28beed38ef5cb8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7599b8c14ec67ea1ca820f628179c9b2cde13f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60b334704c19a7e03b0d672a2c9b2829b40fc31e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_perceiver.txt_chunk_3.txt
@@ -0,0 +1,70 @@
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428b6ade10a7c1f924f7c48b8134501e18c727f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04364ec8291c1c15a3176197c99a347c02587dc8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c127ac4dd87b0e8067954d184888c38f316d1616
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8f8ad41ec53b92936e7d1e741abfc9dda88b2c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72e6edf700671a053d80cfc79578b4b3c9616b16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
+
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0b836bec36b72352c8b2dafcc764afb92110242
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52e8a88625798f849c9f3e82a8829519626f4afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_persimmon.txt_chunk_6.txt
@@ -0,0 +1,14 @@
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad52ca2531f68da99a764b0b11bc3983da3df24d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7f91bf1954e355cbb35a15314ebd5a8a52c9aef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..118194a0b2ec17727b2a5d9de5c4add940f7b66f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_10.txt
@@ -0,0 +1,16 @@
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0238cad53c84fefc5e0256cd5d5575f05559d96e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfd6dd1e1e8f9e06531c359c7a998871d5b026b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a2d11ee630707f97302cfc06bc13184e4d8225e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Example :
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b3782c3db6b8121d059b90dbf1da85043a3c0c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65c8e341f3213b25b0b3d379aefe810b336619d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..330829df03c001fe72cc11f636d3872866b670a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7092953b15dd92d67f9b4a699f3ef789e067cbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..554a9ac5d56307778b553ce8c0f01fbd25bad925
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e86dfed159385f20b329bcf75bf3b0b713c25a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f785c53185c0f434ec98793bfa0bef154c2cacae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3954dce9432204033af8066835a5c16a48882d1f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09bbd0e2b77189db7906af15d097287d61e2d6a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f45e8cb4e70663702df78b459797359718a724e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0be753d308d3b8b778d505b2d766e2827b2168
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phi3.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..185f3f5c0eff3eaebc69cf889dfdb6dbe8730daa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faf6b5c3a2536f5cbc658182055e7885bfeba105
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62bfdcc7e70c90255961ac455bab8846cc7478f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_phobert.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd33602378aedd40b3ff98aa4bc7a1ca9ae9beca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76c93b2d8ce60a6c9e7bfcc3119e0000b9502c63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_1.txt
@@ -0,0 +1 @@
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e24e6900e0da8a44bea3649501c97e97ea1f1a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1599e370e6165ca57fab90f89100344336d9ee38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Fine-tuning Notebook
+All models
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8bc7776f48e297b1ee3f78beec67e26b6e665d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pix2struct.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b657322dd2fe319a9982e74a9c785495e5822167
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0fa00f56cbb620073fcc0f9d8ca384668d2aec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7657addca2beea9ae57b83ee2b3032cc7fcb6c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..73cc8787e25d8a63f539d588ab2f60307c133503
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f902e8616f96078ddd1c16d1a07dcb4321140b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f324fc132a161e2f10354423c41f830acb80dbe4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_plbart.txt_chunk_5.txt
@@ -0,0 +1,17 @@
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d66b0836c02dad3799adbc226833afdebb2cf485
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a316b957610f39e2ad785b3a895a1d0701cbe459
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..849f7e115756a4a5d468edf42bf7d93b2dbda34c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..035f22cee031f95efebea2060a4768149922e69b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7c2585f62b831da3c522643695dd1148ddc239
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_poolformer.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df8550723ccd5d0fa3afe421a4a0c12d1cec640e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1 @@
+Pop2Piano
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc87b2d1b10469d6e4dc8a3359d90238f19b9715
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b24d14c995b4fa502ca72ca6a8a9ae238ce1287
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce5245d43e3c0c5db0450f341864b4ef9e0a29bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_11.txt
@@ -0,0 +1,26 @@
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ce4a2c751ff946574ff3b90e0af525c3fe2fc5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dc87546919fec3c3623518eb36f1c20eb82186c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_2.txt
@@ -0,0 +1 @@
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c917f0d2fac8f1582294265b24b7fabf5431e4be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12360ddccaa244c7e4e2fe9fd893ecfee6eeef2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f90f48c96da192b4b21bf3b9484848439728c692
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ff0735e5bdefcb356845b0b101718fd70662670
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Example using your own audio file:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58bc8a9ae372ab699f3dc329e287d49342c2f9bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98c18154fc0d151eabb53aa04ea7f3b75e3e95d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+Example of processing multiple audio files in batch:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d25f1f78283ee7e670e6b666df2fc646c293ed74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pop2piano.txt_chunk_9.txt
@@ -0,0 +1,20 @@
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9ffda9c42487e069f3c72f58f4a71ebe75edcd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1 @@
+ProphetNet
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c1deac11eb136193a9687bc007c90b9524ce233
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ae98ce5419426a90efcf124442f096a28359924
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..135269d20ac405e8130bfef0da6b15ee21f84bda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_prophetnet.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..371b3db66bb8710d886671a25007c433860c1b47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eafc7dcb781cc5a8c4dcbc6bf77696a5a5f31c4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_1.txt
@@ -0,0 +1 @@
+PVTv1 on ImageNet-1K
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..482388f592570cb8d0b0320f7abfa10fdd6afd4b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5952c7dcb8820c03024f10a3ead525053f4e394f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a293fae20849772a0bbdabf92b811f960344902b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..892d66c54e9437a981c299350f2dc1274cad0999
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..813fa3cbe8e9b5f24333d82b6f2ad50baa675f5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..532f733a386fea2865b33b5b83070f13a60eedaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_pvt_v2.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2be5f06895569a53186f00fcfa1e2c2a0b14208e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3b74b4bc8734c822c847d40f575476f0d835c1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dc25afc8aea0a89458548040731711353334ed5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_10.txt
@@ -0,0 +1,26 @@
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e6868ecdecd4534157f973f156f44b3f9466d5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65b2f57534b055bbee7de5e6fb815761be09312f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9a95fb77df8f3c3cbb1530e226a7bbb7eb3d7f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428808519ce9eaa15621619b1f59975f20a2e96b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..806a0a7454efc87881ae8184073d7111a781e6b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..413c35a4d959d771b1dc588bf104e1328c0c8501
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f896121663d91fc1266a45b8ba37f83681a62f4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f738f81b9cf43cf6736f579a8400a1bd5fda3744
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qdqbert.txt_chunk_9.txt
@@ -0,0 +1,11 @@
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc6e6cdec7b7710893ee2cc9db09e697db4ea02e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cd37a48d305346fa4eab60282554713443520be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beb5d58d9520f6efdcaa6d42b7573e61e26e021
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9a150902c524190daaa591362b7c33a6dd86d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c55ddaf2d0e10c94fd2637ef6a4b4cf1486febab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1a14420ddc981899bf921616976d29e1d08cf6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..396142afd227ac2a85344bc531d63b1ab0287264
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0c952e45d652549944222c1cc4982d513bde6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_qwen2_moe.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a42e4cfa4f3dcf1fe5eefe45c3791a9f20a348
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1 @@
+RAG
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..290036442a4fc0f2d805cb3fbdb87f5e66a347fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3883d9cf9613a89ef4b56ae6f24943a174d3c18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rag.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c959884687df5ad6c2b19c3637d11c2deb91d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b281e1d4fb4a53192e7dc85dbf25950ab07b612
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_realm.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5817bcb6c63d45fb3b4c85175f43d1fca9170bdd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..635dc9aabd5f33bf8d5ff5876f66c38b4d7a11dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_recurrent_gemma.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bece357c393c5d8811d4573974ddf0d40cae3e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1 @@
+Reformer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fb79a3fc06553e2fe621ca2cabfb78ce6ac1746
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d209ba1f6017835f126838fb900f867d030a048
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4aac9270f592ec42ae8a7b2a43b4a0ca52b4cb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_3.txt
@@ -0,0 +1,72 @@
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..900486f4e869c6d4fe54c2a446fbd290bb7a1484
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d25a495047e734f4a38a93fd7750142ff1fab8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_reformer.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7076883e18f27354a35bf2d354d8483d7ee93b6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91d179cfd86feb2be4f1342180dffe9c33ef6846
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1be18d6accaf0be07867f1e9882d7a8ad5db5780
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_regnet.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5019a1c7f5316344adc405c49d44e7b9c65d67a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..313f45bd03e6df9873cac92e5cb21dc01c6db995
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b1ff89389bac2e25544547ae2c341ab03c0b77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e8e9f36bd2235949e89f61f87e21cf791f38f9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rembert.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cec57ae8f324d57731541afdb08300ec16147244
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97cb202e6ee51a072f57c22286eb80b9df19e76f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
+
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c96b9303dd38ffe293aa9b67459090b0e0f98157
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c3294037aa8ce29db4e1af017f41226372e510d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_resnet.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66cafe43ef7ba57d1620419b42124648ffa03e68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab6a9d799c1e65675b58e23a5bd61d3e4f830b19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_retribert.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4609aa8999f36b809c2afc5239c89f00ea496b47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..876c69e4f2006845f3cae2b435c3483d9508e92a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9704c9156c01628ac66add3c6a43dd37a7daae60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..939de49164d0b2ecafc5045daa11a7d77bcfc1cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7361131161adcc6378324067ff61c7516986a08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656d7e2f21b843a074537f24d0a6ce3b44a73647
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta-prelayernorm.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cf36494ccabd524579fe21c2cc2bc354e4d69e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1 @@
+RoBERTa
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c845cee217e9b5f7d9bae8814f35da6d24ed9401
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bbcc8870f4160da1e5cb8a3aa3e7db9f2449baf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_10.txt
@@ -0,0 +1,21 @@
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f89bdc78b55a45f01212aa377b9e5500fa3ec455
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_11.txt
@@ -0,0 +1,21 @@
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff415048fb6f464af764dd0631f1e1cbbb21d33b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5bd15d0e25625a93632e1fc408812cb7fcf34be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85eea99e969ab61c38ea0410e352619beecd2698
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74245f21303d8bd57d3f50ea8c0891c335357cb5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64d5ee964c1ac6ac60993b38137b918e78b34d6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66b90970b31b56c81b32950d3d03ccb65c78b904
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4fb879f4909356287671045a29e9eacbd7f0d7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..683a8e869b2d9b287407e41798d749734a0024fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ff3d9b08200c47b94be8b95619b408e53d0b887
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roberta.txt_chunk_9.txt
@@ -0,0 +1,15 @@
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43240ad1bdda04da81aefa558e774741b2a27ff7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6270c07b526cbca4a0c9b01880e28bb022939d6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roc_bert.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfbf8f5ce77389ebb8ff9d5d78e20c4b2c3bee1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..003d016ae810363de4cdc7c8d6ade75efcae5bb5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19d873a5bbb970365d5c2c4befca5607928d73cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..302dc90cf30da7a22accdfcf3f68472c16f365ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bb16c5589e3642b0d3e9ce1de18d9a4e2e94b83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_roformer.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a1366f13b038a4e5e165d83608b1c10abd116d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..377b31f2e5e11e487438576514ebc6c84c6eb567
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa7aa0ea4c87fcf07d8a59f597875f66878d8802
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rt_detr.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2388eec5368699512379e14089cba65666577c23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bddff1b01895136b501388cf8429c9bd0d8faab7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a059b73c60f288bfc50d38289d810f1720ed427b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e5ff9ef12bd79ed862e7b37e322e3c6890b107
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_3.txt
@@ -0,0 +1 @@
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb52dab41ad488b505a077ff0a4ade5230e043f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_rwkv.txt_chunk_4.txt
@@ -0,0 +1,41 @@
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b25f18d741193c6e7e0f36f28dd19773351c56cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc126ad091de41d26577c0ff30a98248b074f869
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d226c87391cb833204b7feb4ffc2836009f0f448
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f75abb01bbaa09dadd67dd98c52ac86e8f424e96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b17e22c58d710f7187fcf32c0217d6fa4b3df81e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdacc6ce04b9269a8422bcb864643b910f007212
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2c5cb9b6460c18ea06a6e03b4bd59cf6ed8631
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd47b54ba56be7a49109407e5bf1a9efcbc0842b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sam.txt_chunk_7.txt
@@ -0,0 +1,19 @@
+Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a88f94606e8b65451a08e0ee8c8e3c55363b317
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f46d013c5e50a90600ec06087840cd3b2beb731e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bc9dbebc3c96bf78edd42209317cd6c2f153163
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_10.txt
@@ -0,0 +1,11 @@
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ed6d056b27b7a4f62b4c9160a956c55429346e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccf6ba8864751bd806e125a66b2c392ce4509148
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_12.txt
@@ -0,0 +1,44 @@
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eff63c22d86e6fd1724ddff8f4ee3cba40f2db53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de770ef154de5362ec5ab6e75d1493ff526411b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30342c910a45125969516eb66df2949e994019f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce2010e0a9907016389d9566c54dc36fd17d2321
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ced8a26085d22123440cc4deaf0e2ab3ecccfff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afc1dfe71b37120aa491e6c2f7be19d3753b6119
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c095552d448c14a4b06964ed43857275359074d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe8f96f1dd06bbac2d2491c6b0a62375d55d6267
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb04001a97afd0f4f6120579fa7813c63f209c81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f46d013c5e50a90600ec06087840cd3b2beb731e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16531d37e5ed4ff8b2cf1442d9e009d0c6406da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_10.txt
@@ -0,0 +1,24 @@
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a32be0a843c89b30863a43426b3c386579afe50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1055a8a6169fa7b85d4ce4156d67f9ec8a76d39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_12.txt
@@ -0,0 +1,20 @@
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d03ecfad0189156b3f422ac39b6122fcbd22189d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3f95029b9aca58eed63825c39e67cacef81d7cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab554123a6c841087053cb36e8796fcfe2f4afd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..400da903a78e961df093d4d40b8935ccd789439f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aaa80ebd7d2f102496828bdc822f67497d6586e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afc1dfe71b37120aa491e6c2f7be19d3753b6119
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9c073b417ca18880e6108c9d5f2bf4ed1ae00a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ec9e88497537321ded05d280961ea52233da724
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seamless_m4t_v2.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..544472449e97254a370a954e43db2ba1e1a51e6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fc302dbaaf0accf269ba4c1e9d3641750487c91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fa7433fd99c6f9b811e869a49e0ee4c85d185b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..744ae3c3f058e581fc9ce0d5b6dc9582967c74d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f3431bd17d5a799e5c16ba15694012843456614
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64e6c2ce06864072aab316d8575dba6c6c1f293d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce41270c25610878b8f79b39e410bc83fffc5f57
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f370be76d46ded7db74ff112c6c8e72352eb2fc9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_segformer.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8676e6d278f8489e8d6af62927fea31d0a30f27b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23aa1ef570059d9a1760fc78063fb8bc7a54018c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_seggpt.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4012d0b4e0ca13f32a09a5192f24d081b135baae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3001bee18ce565dcc618c642083222e9695c1697
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4dda0c49f8313aaef742773b4debc1663b48ab9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew-d.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fda6d0683117ab2a4874547f781835c169f5dfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd96edb79715006daec1fec7b03890c840500383
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c9dec33df28f06e738c8d03529ef0f6f43ea12d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_sew.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7eb8fdf4a8510351264fa9c2f78d8d70d74f8c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..281a146a607b71a9941c05715a1681ad34dd6158
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb40e61998a15f9608830e7c56dc682010ccb4a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ce06620de41d2f3f275a03795d52362bc62684a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..279bc5542c13f22317c7c9b5c7dc7062d79cc00d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb4d6bf5126260512b0613ca1f7b0638454a69ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_5.txt
@@ -0,0 +1,11 @@
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2446938d8887fa0c908f09015652ed0cc44d6126
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf7007de9823252af376c88913af2dac702bd4e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56869e85f53e2768432707276a90af4b83fce2f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_siglip.txt_chunk_8.txt
@@ -0,0 +1,33 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92b8f89ee17676265a222de21635941d49291ca3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be7232e6dcf8bc41e7c0a28ceece830c97039a0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71a5df5f334c5e283fe423aab73c807f77a5891e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7ec4146c408a2aef97c0ecebd9af6d585490ad6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72d3b6d8afba23e45dc1b62cc377c1fe8376fcc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50db8c3401d21b4fd99c3216881310043055a0dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..022759c121250f6d6d0360b48ff2d71f2f3f15da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0b17f1b718da5ee84f29bdf851c55b026330b46
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fd37ef9c9cdd0fe5657b27bec4c038ca8cf16e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech-encoder-decoder.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a50b76b29e9770ec8cc81a9da9a11e08ce86b722
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0629de34affc9c9d1c533e08b46de22c0b5a206f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+ASR and Speech Translation
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97058e1263af1558b82852e7d9ba5d328c4d5898
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0ec7973ccf6416fc6b376c497343319bd509998
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d693e961b329c26554f17edfe6eebde62a57b77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e5a5161cf038b3666a42be6166be5c00dba762
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f14e9751e83208255718e9d06f4417e19b38de39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text.txt_chunk_6.txt
@@ -0,0 +1,13 @@
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8283b322b2cf40ce27b4f6f66cb6d91cb6bd34f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db20d125d9e4dc94a4929cda7563e61a956519de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c61f36a34aff423b6cea4f85f68b7207cbd13895
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8fda4367faea07fb173e0ea72f53cc956ea50b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45ad3d77c119ae2496ff2a39d95b65c04ef53f12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Step-by-step Speech Translation
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..181513dd96c39acbc4621c892cd6edee4cb76f34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_5.txt
@@ -0,0 +1,15 @@
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d15556ca7e04e9ea894e7053ec135f6df9c7564
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4594ea61fd2de24062442bcc7bd6dda599bd85bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18524698b70ccb11e59e9d1331659c60c39e6d45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speech_to_text_2.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c02f9cb543606e90341e20cde9094b277853a75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70503f22c2bbc73eae37f28d6b82967faadfd8e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f502b0f42a269b08965ae7e9fe82d30e057498a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..849613bf14791f490d85212852f01f3bb8004548
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Resources
+
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0cf7510c115ad655e2a6d59be377937f065fb4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_splinter.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9681ccddfc5848138ff8775de93731394147de0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a55e2c6adcf745a9037572222abbf8f9bc13c5dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527e062c55adb36614a5c87a38db8ae3c1935ab2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf9002ef045afa81549de8e13400afa385af0c5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_squeezebert.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ddda0c596f04afb8a0931cf6ca08b0734cadd4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69652a0719a49f4cc3ace9704ad418650c235aee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8d6c9ece4be62249f7460a094e929a6397951f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e02fab542ad47478aa9ea0e6426e5522a3e8887f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02fb5db925b789a30bdf55430fe2fb7cd6784af3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4584415edb678c308225b9141874eda75d97864
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_stablelm.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99f69381be06e7c467fcab44336f6842fc8a5aba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7611d78a3448caafaa8024406966fb5909219486
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_1.txt
@@ -0,0 +1 @@
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54b7a028d96c5999b6a4cfea7bed3ad1a9d71837
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d71d6cb8870b531db2cf47429e7a9daf6d92bde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6910d5dbdf30f84f18105af4b24929b1675ee1c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_starcoder2.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c18ca464e1c6819f581f795bfca30818e121ab36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..837318ee88c978d91fc35690c56aebc5a6f687e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..838494b3481b88b52b930f516d4ab87315e6dcbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bafa41cc28933674c0b9a4c49e8b84a4a872a209
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1150e5587516f14466b84f072c0f2afca4181856
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_superpoint.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8551fa61445b024883b2c35d365920a78ad7c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3ca7b0ebd43a4a93e3aed4f6084e0582c8a207c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..091a86202e7031000dec3262d7eecd33cd98b8eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ede27f1f64ba5573bd4c20eec9884ecbdc4eb225
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07c3e5731143b97ffa821699097e098a3c76d93c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65868d4a8a5ef3c36530aef372563297deb1e90d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..889a6e188eb4f8efb29ddc5f1a04446117146f27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..732f33168f8c90612e1fd67119c69cb7a768a8e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swin2sr.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c6ad6e454a2e3bc5078a0b14c6195f65576900
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5487b0af740e9170511fedda15b743be02fe2de0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee62a60bb5f683522d594036b407c1c719fd71f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_swinv2.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7661f5fc3e5d38e80f1283e45bf94d0c6d719e78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ff912ac6c56d0d110a17cd8a0ee332828a82412
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ecc1c50b4edbb422d669b78c3bb37c3adb6df76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_switch_transformers.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80202e30b8f7823094b8ebbbd42110d8e2a4a67b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1 @@
+T5
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6043666d21321769c3bbe8553a30c938898a0e41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e929783d3f282bce555cda9893243ac649dbed7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_10.txt
@@ -0,0 +1 @@
+Unsupervised denoising training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4d7373636d97212a6789ca3c249a943a2240f2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_11.txt
@@ -0,0 +1,8 @@
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6793857987f17b25ec4aa5f79d1e3f29b41bb374
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67148c5b779ac72495535549062a14caf5d9114d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..310e2fc6ce274a9433509692f7263b5947c88849
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23ac3774d3d48b973e6c9c5b1e29e9f676188911
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836acbf0a847896c54c4edfe97775328d606679d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_16.txt
@@ -0,0 +1,19 @@
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b643fcb8e1b256c493eeaaf4b16cb27c7b36439
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_17.txt
@@ -0,0 +1,38 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e61bc33d2c18a31735048980802c7d3ebb40449
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58e2aeea9e829c3fd7d93a02ca9d51f6da81eeac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_19.txt
@@ -0,0 +1,16 @@
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e256e3fa0e4dadec5b8a967e7f853f4bf1311e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bae71c153fe245ff4bcbd09e746de01ae03178c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21b25753ae33379911da2198fefd8ae4505c1051
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_21.txt
@@ -0,0 +1,4 @@
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c5b8f35d22ec05f932022133739d3d25d526ed9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_22.txt
@@ -0,0 +1,14 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cae6d0dd9a607e37654abb06b90cb637afc1118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_23.txt
@@ -0,0 +1,4 @@
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..168974b976138499a1314ea8dd2b171ec3a135ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acad34455b7c7130e3aaf1174ab236ea727c77f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05a2b79d600cd06278ff67102ee2b5220118ccc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6fb8b3e2ccd905f8d4ed04c674c557dbc780546
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_27.txt
@@ -0,0 +1,8 @@
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb2e39b49ae87e53517d5d962bec0af548136cd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_28.txt
@@ -0,0 +1,5 @@
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cd0bbaa1919237834ebc842cca77fe1256f2776
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_29.txt
@@ -0,0 +1,15 @@
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..941fad16d69ad0bf23aaaeb0b83d0bcdcf866bb0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_3.txt
@@ -0,0 +1 @@
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0256b7985e901fb119f4c7c62f2fe5a09c0ca61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_30.txt
@@ -0,0 +1,18 @@
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7e03ae83d6cae0e6900258e73207a1901d47e4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_31.txt
@@ -0,0 +1,23 @@
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fafa1845c739e33f779a84dfb6b14d7c515ca6d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57468a06310ddce690ed497f999bbf548af8b2e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36a7a1d2b60d8f2ba3325f145cb9c3c22fcf25e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f162fb1b8c391f9df85cc10896898b911a014268
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f283a8b8d3294d2041463925b8c7149dea39f87f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_8.txt
@@ -0,0 +1,2 @@
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664cec844e18109c3dfd5a19da6319b5a7028ee6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81df12260878004f67791f4a61fd22daa739d317
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dba76227fd1cbb2f1aa6c297b461837f40771a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c53ccfaea092de5729f6990075a2bce555bce7f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccf81bbf5009cea75593a6eb7ed61540bfc147ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d5964b5784bc08ddb71680e12e0edc09aa47956
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_t5v1.1.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8015cf436c0f7d4363feeb214828d0c37afb8f00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..604ae19ca3aa03b198a16f15dbd6e6e8793ea38b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92ebc87881c651db5748e0681ed8a0ea878faa2e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_table-transformer.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7846661e59bab9d4d357e30d24f3b57e097b93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1d8c8ac11d3c357b634f5ca27d4e2674e52fc67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ed066385f51b28c3d71990885a04910b056bc23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6653321c782a79897f0ede07182ca6f31cb8b85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_11.txt
@@ -0,0 +1 @@
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6a6e4ee2cafe73d762ae88f1f573e73eaed7431
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ff448f0ae48281379ecb99349ed41c124894d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_13.txt
@@ -0,0 +1,4 @@
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d93b6f8b3dc3a06c15c67101a917207dd88412a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2ddfca7f58ab6dd09aad6a4f50c7a4c6720d77f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_15.txt
@@ -0,0 +1,2 @@
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e3114ef9afcdaa06e8abea35207906497eeb69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62fe260c8a7a8230b4ca1f19b232abb7ebb31e2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_17.txt
@@ -0,0 +1,24 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81d37c0cec0c2c085abc7d358e7b72725addc21f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_18.txt
@@ -0,0 +1,2 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74230ddaeb743e31794188131908260bbf7cae07
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_19.txt
@@ -0,0 +1,8 @@
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67b2539c26ba93dbd9b5d355cdd1a55648dc70fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e21930e1be365d86ed756a851d784686fc9bbc5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_20.txt
@@ -0,0 +1,21 @@
+def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0852c00ecf55af0490ba8770db0fe1d5fc7adf35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_21.txt
@@ -0,0 +1,8 @@
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1713a7a25b3c1ba0b9aee396e0a79d4a0bdf0f54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d18da96de89608d895c90e2c2176fd5febac384
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_23.txt
@@ -0,0 +1,24 @@
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81d37c0cec0c2c085abc7d358e7b72725addc21f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_24.txt
@@ -0,0 +1,2 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01116f02bc83542779eafdce977bd6eb03218a42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_25.txt
@@ -0,0 +1,8 @@
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e068bdab9b629d85149d2900e7bb061dda4e0f77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_26.txt
@@ -0,0 +1,24 @@
+def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5499cc95df2affe77436c39323c8a72c2a6e748
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_27.txt
@@ -0,0 +1,12 @@
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5085a1c9899c0bb5f55e406777b4564ed3e8dcd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_28.txt
@@ -0,0 +1,3 @@
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e630c5b01a90f16f4d2a239d6ac06e9c19eef9c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_29.txt
@@ -0,0 +1 @@
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb401bcae1003a1ca00fe39c7c154a8b347ebfc5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2bc12b912486c6941d8eebe2806a91c9e10669c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_30.txt
@@ -0,0 +1,26 @@
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b7ccafe9eb9096c60db41f20c5438b6eb51367c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_31.txt
@@ -0,0 +1,19 @@
+# zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e3cd255d58b7a95cfffeaa7c739d221917fe381
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_32.txt
@@ -0,0 +1,26 @@
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67e0b491ad65f7e425742edd7bf70cc3d26bbeb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_33.txt
@@ -0,0 +1,13 @@
+# forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ffae292b67a2bd3fb36414fe7030b9ad834595b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_34.txt
@@ -0,0 +1 @@
+Usage: inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e8e69f56f33f0ab5bb5f979cdb9f90e6b1040bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_35.txt
@@ -0,0 +1,2 @@
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7298c17456b4bb0efb5a3eca99e60d2be31222
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_36.txt
@@ -0,0 +1,49 @@
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ab3a1ed6f85d0cc4f118aa4909f191e941d0590
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_37.txt
@@ -0,0 +1 @@
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8c649934e00938dbbf567fe69d527ad36a305f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_38.txt
@@ -0,0 +1,45 @@
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b71d95c060ad1002289029df5b25d6a434bb165
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_39.txt
@@ -0,0 +1,5 @@
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8065f3f37a0a08cfacdc41c49ae1c9039ebcb723
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1165e8fa84dd072724c674e78b68a6d8beabb3e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_40.txt
@@ -0,0 +1,9 @@
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cd6757a2e17c36ad66d841f3be0e28f705c2839
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_41.txt
@@ -0,0 +1,12 @@
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a27b239518ab348fd00cad35f90fc188a0555f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_42.txt
@@ -0,0 +1,12 @@
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de3dfca67c2909fcab531f7d0fad9613a9bfd775
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_5.txt
@@ -0,0 +1 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc63c7ae29e3c68e8512d808c41a46ecbcefb248
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9096434f5ff8832660f5fffb239606d69e3a5b9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_7.txt
@@ -0,0 +1 @@
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ffa862bf463658e962713d79a80e2cb8f45acff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..674c1533d64b4993dda6aaa758e1bc9bb12c23bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapas.txt_chunk_9.txt
@@ -0,0 +1 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d605aba64f54a60a92180f810c9a66c072563635
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1779718b4bd14fc4fe311ae1207a3b20ecb899dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39da382e68afcbab673a4523e0dd142d0af26aba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b6a5ed1abd098bb80f8202a8ec1735e3d673149
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c6b866f80309494f1d0e903d835b9f96d6ed01c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdbd8b2044b04e33ddcb5862a2b3c7eadc2578fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0be2134c3632ec90abaa64a673ec885f570104e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_6.txt
@@ -0,0 +1,14 @@
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52afa865c3fcc5f2859435cfbcb3bb01ae0fb623
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cddcc810241f82dcbbf3e610e8bb3ada71dd876
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c53c368618c6022d8e57cf0cdc8c6180fc22c36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tapex.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aede16c28e453a0cb46948db939e29a4977672b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7c44f4162b150c068481e23dcaf39b04deaeba0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c628d4b6f851e08bb471692d7eaee497d040e6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b84b3214a1b931e5c27779f4dbbff2dddfbf7014
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e89e4888bb8d9752cb188c7d2419b20560c5e432
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_time_series_transformer.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4d0715f2ce6eae75535bb882e682ec324d0df68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f47caaee36f6f94f464357453083ee8e2927215e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_timesformer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8617bbe25a34f286d374a833c741c8ae05a61ac8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f16b130c99984b1c7bfe9adb1412d3a7e377e9bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trajectory_transformer.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d98bd0979a2d9056b57724fef146bf4869cc02b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1 @@
+Transformer XL
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a238de86509788ad14669a8fdab956365d5f99d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c9639a48d8f48953c35e6ac31a400d55d7d900e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d71eb1b34b40597caf87f129e21bbe783f2c4e88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f879c54e91ef0035286233fc78321b540f6686
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0df2d6a729aba676f60a407e59bd1dfedc84e3ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5395a3244391cada752c0ee43f86375716bd5d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99eb6e0caf60abb48f943731ed9e232faa3a8e41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_7.txt
@@ -0,0 +1,19 @@
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
+
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4202ab3b9bbcdf045ee5ad80b785c1aa64754a88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_transfo-xl.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6459f1e064bc6beae366ee7af983cd52a22f914
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb7116d267f6b63382169335fbcfa37ba9b35d28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a84188552e89368dee7904e76da25b62f58c5a1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c07d50d8e07c70499f175848571541cd7c298bb5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e339abc6aef6a2293745070233bb80e1dcbf4a7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5ebe14e9a66bbe000b968a81fdb8196f15bd6dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c254b9e66adb099ea0bf74d68f91248f40bc0372
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c975c7e54140f31375c2ce981eea1fa270ef5bae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..492cb0de5886f8332014e45b8ed45c45be9474ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_trocr.txt_chunk_8.txt
@@ -0,0 +1,13 @@
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c24abf6639a62135ed54abb646fc732e59a262e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a75663fc1732c1dabca30b0d216527455e214f89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a44f0c28a3a30c92d2b42a459c385a50a765806
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f0f38d505046bd27bea559131ed0e3256c24a45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e80352c417e0185cdc0c7a9ec416d3e601d04184
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvlt.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11c035affd9367314433d29c2a36c248f002c7a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2524084ef1aa31df355253d63041a79cd9328f83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_1.txt
@@ -0,0 +1,108 @@
+TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc75c89bf66a22ae39cbb0b1b8ccc4e5fe4b58df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbf15b9ebb93fb25d45c4cdd9d3df5d1982969e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_tvp.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d50de0a12d1298fddf6243dee37f817c2ce1d41e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48345f048c24be1a38814f0282863e82c767575b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+UDOP architecture. Taken from the original paper. 
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b9e2eec71aa8d939339d9cc71622c474b3f2ad3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..749e0b03e1b3b3b400638d97ad183d1b801a6a6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64886b2c1342a264b0a55abdc203f2eeffca2976
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_4.txt
@@ -0,0 +1 @@
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..320a95b3c521a2f2aebc323422273603b86c2340
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e90d97512ed71f196b1a3b10c98c7f6b80d1132
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ee8f5d5583655fbdbe5f0a349737b3e07a411af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc285bc50969d499440d09a3afa49dce876b3ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_udop.txt_chunk_8.txt
@@ -0,0 +1,22 @@
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd4dc3c5a6ee5bc60542e14bd041286f8e14e475
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..354e30c8cb6dbc12a92a8eae928f88daa5cfa293
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_ul2.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
+
+ 
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41aff19e8f69cfd0d6d2caa641b1f8f2092f8a02
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1 @@
+UMT5
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f51c55b242e679872a05236bf3d8a23b8ed5c25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fc0aaa7a3819885252fd8a486be77f427620a68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a98f04305f5420f52c508894b823b258cf84743
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d56e95717ae10ad73adaaac932fc30bd9ccdb8e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..704b2ccb05f3ed60ac35d5648c12732f6797afb0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..175845b0f60e7b2b3a508b729ce68ef371db4ee4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_umt5.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b0c6e78fb9bc26ab57cce1cb0e9f4eca249e1c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d37298a6bf732ee5998c5292568d5ad6ec2ea6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d386c5ec88200d5b3475bd5007346b5b365eeb77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31e5229b184f155b7b89460230097ec5b9bbcd87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech-sat.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a79176f0ad4dfba638a90abcf302e6aba1eff95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e942184dbd68c2af04ffcac989454bd980cdc7c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8301919cf412093fa8fde0904f22fb4d0a10103
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_unispeech.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b5592f853b5ab1e54aabafcc0cc5d19c9f3f60a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..664efaae5b0a1e7a5d074f1656a5f4d59ed98266
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2abc46d9c52dc6c259af1d6826add825f343d66b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baefc2171adf295361be6ba04867ad7fb8fca385
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_univnet.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96f4984f817d8d8546a8d998d7eda5bfb539a815
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87c69a567fdeb46173030d1a9eed2feaf25ccc7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d1b05b60db046d8f8599c1448b79a0e430f6a04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5fa3973c77d40e6f05bc085db494059c480c787a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e72968f7857cfc27ae3a418acad9f95ed9c7f96c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2f9a1fdffea09eb5c945b8a00d300c773f41a68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_upernet.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06ad59ca43e542a8aa96aea171eb188b36b28458
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb631a55545ab8e74adbd80694b4d95b70edd50e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9779aac6e761a8ade4b74718cc2392b83a2092fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f062bc49bb3c783914f38ed15ed190776a5f059e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e273e9d24cdd9c68eea0528f35cd49fe0bc3f6ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_van.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0fdbfec488ff79ac1d92f0db30c07b3d244afd0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83dca64a69ac5ab704a11afa49f61e61ba48ed45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..204de4c4135da8bdb09fcccefa3d07d79407acdd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd8c2a8627766acb92ecd9b10ef458fb770b8fbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_3.txt
@@ -0,0 +1 @@
+For multiple turns conversation change the prompt format to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45a52305a76585ca3109e76ecfeaa48f98fb31c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd8f504ffaa5c2ec8a403f4b8c1e42d11b485b2e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edcfc690a240e8f069fa6699bd40b9e00e32e7fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d29f44fc53ecfb3d65ceb73a5fa97c0464dcf878
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72ae4caa6c0a18a65aa540eb8b4494d0ef885321
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_video_llava.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27c94f2fd5a9f055524dd4127271b923e6a6e1c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847cd8149a4f99625d9d8752c18bee05f2d5d1ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e6757219a50f60d8d77d91a1fb379f0da136bad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_videomae.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15ca969bc52b3377e8ab704a5c6686f87ae9027e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a404730d5b2524cd1dfd1aafa3975b0a97221789
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07ed75d9a90ae6a6a45d285ddb93b596e7935afe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..056f3388374433f6bd10c0446e2566d1338cc08f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vilt.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a74d52d3edbd25b5cac679238160985768a119ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afc21d1e544ab4fe121ed7cf7a2319b681bf90fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ed0592e18f3db8488c628f43cb943c7c3c1d3c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43503757db58ff923b49cfa25e183d79d773140d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vipllava.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e78721e1f926fdfc7cdc43cab7ab9c9de9cb839e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..439e4b5d854711bc143f6eac907baf774d40f361
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8492b2b29b21e918ad870ed999135d3233d4c7aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de09a391701f4ede0dd899e7b5fa007b720f58ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_11.txt
@@ -0,0 +1,4 @@
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f350c527ffac4f91d87ae97f87b26896cd731a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..458ae0e6a9bca4ae6703423bd179d4742f2c1414
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aef7240109a3a870a799b10495986618e4e89e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc74d7dba607496c59c93d638cce5bf2a7c36c88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8089d2ae7d13ba263cc1c602fc704fcd22c0d57c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6f50b03ed544cf2507d2118463f05ef8d4ea65a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f56284b8b6035c6df06a50167f20477d93e81b79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30037ec5faf1399843c9e85081845493a6fb36b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-encoder-decoder.txt_chunk_9.txt
@@ -0,0 +1,18 @@
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01e639cb8c5f6ac9395b77b477dc6ad9755e5f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77bf5a5550c9b9fdd1c26965ac46c53221d0232f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vision-text-dual-encoder.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4fc41e47373a9aefc2a493a48a5e75075fe9f96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6685a7d4e84992175537f4ce8e857f74c73ed06c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27a9ae6b8daa64cfd743ccb0825c5d9f6bf8b712
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11be9a6af0dc59e0269835de4f06a051f1182fdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fb650165999edc14c3024b20ef75b0c4c038784
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1285534d381477772d5ec5e31113d68b67b2df8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3f900aeb0167eeccb145c1fcd309277a9501cf3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_visual_bert.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6fd6febf399d5ebf3d146a35bdc8a45eccda937
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51bceda5c1652ae9ce7d5701c2e3b45ff6ca987b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb6e48aee9bd09a39df89108fc7af25109a42106
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33304a044c9e92b3a88347153ce1ed8f327f5dff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4486fe6d6be0f7bc0abf38575fabb3ff18ba2fd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d112919868e7413516f247f59bef0a54eb22ea8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_13.txt
@@ -0,0 +1,13 @@
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d7aea248b3653c107db13235c052d01d6bb716b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20e94c171cf35d2def5158e2edc781c721fe671e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b5e531474c1ae7eeee9f2bf06a59acd7950e2f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f4e20572726191ef0102526b382f3685b53afa5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a8cdf5c9ccca07f284ba96338b92bf281ba66ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b233d9f9cef209cb58d9a428e6d04af1ebdce228
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_7.txt
@@ -0,0 +1,20 @@
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f888724c8088965a210ad4722c231773f1552ce2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_8.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f8049aa3760cf3bb338b64bcba396d8e9bdc5cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit.txt_chunk_9.txt
@@ -0,0 +1,12 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..134f1b24638dcec90c50ea3b4f3638051b9daf3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9de438979af27a9032c2aa3b5eb50e04832c8e97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e80a5099b45264d7c9cc0fcaa281570f3b02f2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44468e12e029786eba50f8b2fdee4739b865c572
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47e5a44b27e79e892cc5e82536ed13b281e5a885
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_hybrid.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d06bb89b7645424f34860e68ca011239b8ece38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c55f5cd06bae08089475e6c54ab7427698a5691
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f6734833e96929d1f4ce7f41ef9c30870c610f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8ad30d01b16507b040f12e152e77dce3efbaf79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2006812da57a30cbfe67b17e2207e2d4fb4aecfe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db5e64ed766097604f2b94d996fbf5c2f018e79d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..166aa573f1acdb82131567208b7b525f56abac11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_mae.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a486fb7b04f59940072bc2d89fc8c9b52f44d70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..430a480b4e823f2308eb8f9e3513ae63d55a0bcd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19085e243e902339341f8a2db7768bb028e0d742
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..321ce996e105cfee4b94bc6a1a5b96744a8c470c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99c6c1681ebcba95866218545f580ace645a7595
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84e40d60202bd51b79eba52209ccea4cbf607746
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vit_msn.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..555a5539362051763192fb8969cc023f1ff87705
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a209b27a23c2497d3b66d3d33e1bd026775e55ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitdet.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f50d1802a79161dadfede17fc66b2236d8d46ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..117d0410240379fa7a8802bb0bcd110c5d1b257f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fad49cc541fbddfc3148dc96461b1e7dffcda77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vitmatte.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1c54c8fb66b6249e23807e6372dd4abfaa366e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153bd692d5b5c381653030255e306b9f2e16b8c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40bf385769b4ca16f3ceb80130e2b329831d69eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2bacfa411f93bc4006b90a8f2e268ae0b3bff42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8986bfbd331827a40258ccfb384a88d6233c3e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0739af26159fc9d4e32511d381f51dda144034b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c066534255283472f43446e9b4c9da04367bb875
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vits.txt_chunk_6.txt
@@ -0,0 +1,17 @@
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vivit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb4edf8261b172da2a5993758fa82611b5dd0ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03d01f871145648743782a79dcc424e0b4648332
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db3128e5796f636311484ecf60f53b79f450e189
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08afbcfdf93a0edae608cc28bad016372a9b07b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c34d923203bb278c36bdb86382b4786e25a0196
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..824eaa5a40e08f80d6c36a8fe465bbe244ca9aae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d386c5ec88200d5b3475bd5007346b5b365eeb77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12602a2e1118bb2033ec330c842f1989b0271603
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2-conformer.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e76ba2aa2b0b7a23ea8199947c0a95370e2a7e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e044f0b8e22e0f9a5fecf12d22f1cd9fbc78f053
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a5c9f20a11c3a169011adb38b6f2514a36772b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_10.txt
@@ -0,0 +1,32 @@
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd8e161c0a26d9ff5d728f3223cc2766fc579e32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3273c7a9e4f90947ec082ea612977ce25ec6cb7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b971640415d39be623494dff6c982228343c6118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec6ec8ae2dc7dcd6323208eaf27aed5b54075499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60b98e06e60ee8cd80dec8df9f6d2c8ca719c4ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_15.txt
@@ -0,0 +1,19 @@
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da6a5dd82ee3a50ab2bed463660d179c839121e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_16.txt
@@ -0,0 +1,19 @@
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3061948cfc44d91530eff2617902a1af0f9aab8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14ab517a62905a5329fcc1a8346709b14f3c8dab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de053e8224f1cc7cb96da563db55884b18b99824
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52f2f4c4d0e7c28b52793706e980ab94f37f0fde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a64ce30c4c0f99ca8abd8cebafc13d02dabe876f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f6aab3b14348f1189f10263cd98016e6b8fa530
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0dd2869bd56c6d50390a07262d4ea788a536c86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f89e5690ee447e39f0b2dda9dc66185cc3f3ee52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01d1f0ca5c246809058f9f6430a3d2f3fded14a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f3c5941b9c1b7701913f9160ea451d4ef9d0d0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dbe5061d1eec77b3b46c0778af9d253012fb8c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wav2vec2_phoneme.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49c7eb4b8e1a706e621669c2f8c97082bc1c3974
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b49c9a705734121d58a12f365a800f645373dbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13d1ce0853275b678e31db74cca45564e6adcf06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_wavlm.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd235c8da2a61ee3309e6e9f9cf65c495be0bc42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0209f20a3a7a5c2744a0271ea9cb2b29d4a3f30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f12df3a13718ffeb0886b13845f379ca4f6fbcc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d5c7f6c403592ce7c17051a00ee54f9260774f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a8712f1aa97251ede8656185f0c54908a57d15c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6651da1d6e35665d650a6350b8c8154fb71a072
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf79dc0d24368ef0b712c723f36e7485cff93024
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_6.txt
@@ -0,0 +1,34 @@
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c640b0517c8c017c06362d9c7f4d0f5a7295c52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_7.txt
@@ -0,0 +1,21 @@
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8606be6031f3529369bf4e4203eb08b50708e22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_whisper.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d4e238de02d8ab44332cab1f8ec3c4b9f3e102c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4876220c281a63fb6f2ddca30f06f62b1b603981
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
+
+Demo notebooks for X-CLIP can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3ff2b9e81db180528c7d1310923cf96173adf9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xclip.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b34d2ec84ae420030ac73a1430f6e70b884abaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6553375874614e6a871d23539fcebdb111c8ae1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d159437e8adb7fdcb6087f78e3bc09957030aaad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xglm.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8f7bf81f7634598c07e63de1cc87ac1c317620
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05fb93f5cd7b763bf028b29b44679fae45d4af6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b47798341da87d225804402f53bd479d5ccd6e93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d1b06c68ec6f316c424142d34f6c29953e2ce2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-prophetnet.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c8e509324dcce5050f1ba4961031ce38d61f1de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc4f399adb47dd6239d97e717cb227107fa6fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0993dc94f777f41867d3923eb9a1c048dc87d81d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta-xl.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5da4a8bd2cac2240eb0c524fb223ff6c5542ecaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1 @@
+XLM-RoBERTa
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e89ccc4d42d0c7612e4ca3c388fa109f8cf3d6b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8174c5698fab1dd7a8c522486075958c5058c57b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_10.txt
@@ -0,0 +1,21 @@
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11566738394c8a519179c8da3b2af2e64b776ca4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_11.txt
@@ -0,0 +1,21 @@
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4461fc690e09efef5e2c07c377646faa4a472488
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..345c92f9e0ded51c2af34a898af53a80581c8986
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
@@ -0,0 +1,4 @@
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5eb172f4e7825639f7edfc381b0a0649142fc827
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b379529b5036978910647ec754f13e67869ab083
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac8c41097fc8cf594e2c04a32310020d3a6bd40d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd283617c520c0629d6a2b9aab20c309e8c96962
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c28ba0b32bb0877cee008b59b5903d2b198fe6bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b8e4aad789efa4088237d0912c7acf837efa4b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7b0fc048c1790d094355339e28ac3ac90be02d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-roberta.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c59d0421ac4709b33cd1f8425c06cd456a095486
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84e8af84ae7b8de56f1625e4d16f540b81dbc3a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm-v.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03579505a30f9c5bb7dd4ba5770c75c0587b10a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3b19d83745b252fdeb8aa4c9fbda6c4c929ccae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4a238300d6cd75a23c62d268b05216d4d60fcb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56850b3b2373d3fa1332482d3942593b5be19218
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_3.txt
@@ -0,0 +1 @@
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48f9e83bf9e6cce3407bea834f31c0c3bda8a9ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25cfdf1d8debb8f755a8b3159a084e184a02f9e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbe146476086f344c533682cd348542f0d6cfee9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d22567bc01aadff98dea75a54fe66c002167113
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlm.txt_chunk_7.txt
@@ -0,0 +1,18 @@
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b72617d582bcebe37a3ddba61455b3c6fec6151
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1 @@
+XLNet
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f138cea096bbb385c67401ec9034d87dd0e6f46d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aeb53d45ac3b7b2b40b599f804401329d479900f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b98c1bec05d798aeb1d0ddc479002990021d8d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..012370557c769c9b54b9a0743a21e376bdaf2a91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4ed14de898491a9f91265cae6b5a3e48aa1495
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d60ca89859d0706b1aee30087cf445e1a3b8445
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlnet.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d2807676fc9f51d10a1a809787f8f089941a101
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f407c256e8da7922072fd385027c202882538f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xls_r.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bea4d5badee1e69f29f2d721791eb207d1da7199
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4500f1e0db3909ae85a44a0a83a7e8edc290519d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xlsr_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a1f6a22d300207e64101efff1769a88ec9803f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20cc2e483c2e366faf2935838feccbe975ee0c1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab6e1c1ce394cbcd48abced0003f632e8c476333
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f792eccc718baf002888210eec0169b4dbeac3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6450704f51b4638da4c8fccd3859d570eccdbb3b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_xmod.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c206d5628282ab3d582e6f4473b67d0753c675d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78d9caf98bafe59706e69f7cd59dc4f02d63540d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e06cccf0f84dd7f7efa7315e7b76d5378901007
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b42f8dd3e0a058f80c1862baa992e050e27f4a59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..955022c361736249272ee3d6af6fd327f63c84bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_4.txt
@@ -0,0 +1 @@
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32df62e4ed5b7a9e91c5641be54e899de81d6845
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yolos.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992b9d249276db52bb3656f0c104057675027a8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f3b487d85aa433da2f65ec8df0d266dcf471c59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f7107820c95c1d83d1c2f6f41dd53f8e8f89392
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9166fac0433063b2f615c72495e501bfddc2456
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/model_doc_yoso.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cf9104e6cefdeecfe95b84fdf4a27ed85c1549f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf3b9a9e5c81bae5651ce36bee0621c0f9db825c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca845a0a99c901106aeae5642c683abfe724fa40
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_aqlm.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a3606e574a9d0e1a82b233dce5fc42607afe562
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+AWQ
+
+Try AWQ quantization with this notebook!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93afe061d8b431a0055cd43fdab578f3842f8858
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5a305650bb00d8c6b91e0008a0973b8765d3dd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_10.txt
@@ -0,0 +1 @@
+The parameter modules_to_fuse should include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9a5eae7e15c0b92c582aa9a8f272cb9bae2e3d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d1aca8e228c79520f170af34d784f8eb018b70b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_12.txt
@@ -0,0 +1,2 @@
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63f644194aa3741deff938d29cd166df68e01ebf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..def2bb1f6f7af47265c87fce60da0e264cd37502
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_14.txt
@@ -0,0 +1 @@
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8de3b17ab5b3b1ded8648b96a0f712c6f511b63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..732c778d9d7d204f692ca766949e9ead7bfdf6e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0c8451710501796402ca102f899d822cf2d924f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
+
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b4ac8e4d2be7124d728e90918d9fae4f170af26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_5.txt
@@ -0,0 +1,4 @@
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4ea0cafe70bfdd9197888519d685400ce66abf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a41c56f3f86e65cbfcca279eef568341e6a3c865
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_7.txt
@@ -0,0 +1,22 @@
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d854466d0f7e7704965efc27987d55eb4fb236a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+forward peak memory/batch size
+
+generate throughput/batch size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22770cbd9b83079d3ebed5c72c0be4430d421ee9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_awq.txt_chunk_9.txt
@@ -0,0 +1,19 @@
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c53b9c283299ce919ccb0081ba7cca1a493cf2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b368a46f1ad4ba8c46d1628ece86f299569cf9f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a5e18bdc5fe10079a544f692b02b39bcc07244e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.  
+
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42a85250ec28fdb6c77d58dfb12aa874d67d8fa7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..933a0c3a3c56ada7ef787c5d82dd9b410865f09d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05b3eaaf3b2675f318e85a2bd5ace62434298d70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_13.txt
@@ -0,0 +1,3 @@
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6cb95fdcbd90fe8920ef787359e85a8364752f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_14.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ff6951453fdef96e495ecf3d72ad2bef329115f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_15.txt
@@ -0,0 +1,19 @@
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93ebedb6a99136c557562b72b5f52dafe4cf644d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..616a35bfac39e81323080ffdd1a2f2ba07edd444
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_17.txt
@@ -0,0 +1,2 @@
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71a889815f023c957a73798ac816f85ebe63d37a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_18.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01366bc5ecdccf6cbdace075a814c95d032ad875
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..038db11e703c48ffa4feb1cdddd48373ebf18d53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..031b90b17f86b3b15324d35b030cf2e95cd3c337
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_20.txt
@@ -0,0 +1,5 @@
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..142114ecde2dcca85b440e2223685be8322e2074
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f49d6a72988674bcf317575e79fb42136addae2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9549ff26f0c7c68e1ad69052625991ec25a53d13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_23.txt
@@ -0,0 +1,3 @@
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64640f4f41b4906b4484357dc66f118889fb9d85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_24.txt
@@ -0,0 +1,6 @@
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ade11ed9216681d9fd03b6e9aaacabb037f7faa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_25.txt
@@ -0,0 +1,12 @@
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19b267b134ea198e0504f0b03cfd80d336612f58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_3.txt
@@ -0,0 +1 @@
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e6ccea48a60a5f215bbe22babb75700a108a84a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..002002a12116dc26f4e6f81f1d4f352fa0116a88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_5.txt
@@ -0,0 +1 @@
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4701ef49dbfb11a157db6a9af3debc9aca5e9e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17d30d296294b3e76760dc8cfc90dc7c82e94488
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19b267b134ea198e0504f0b03cfd80d336612f58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_8.txt
@@ -0,0 +1 @@
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d13bed3792079e5e94f0f8aa69ba3a504dfa194
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_bitsandbytes.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992d839018f73aac7739528dda8c322cda94b5f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00d02b9d0bc5b1167b3b821bec922b159157e1e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9758bfdf97733181f60387b6d2257aa1b31c7f5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9940923ca322da4e014b9bf4691cae7de0532e9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_11.txt
@@ -0,0 +1 @@
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbbe8a8bd9725244c9bac8cdc9f2898d4cf725b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8d1d8c420b0294d28022f45391ba360f00f5fe9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..131836ab9439d035e655b98c499f32bb49a0bade
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_4.txt
@@ -0,0 +1 @@
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8a955cbcbf9c57ae1323b89fdd26488c124dc5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18e99c7cfa961884b9dc82f6dc8fede0d555f0fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_6.txt
@@ -0,0 +1 @@
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5de6419b2b60797451f4c92da83fa69391b3f30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eda3a1d4444cb3c287c957250b026b7a27a33817
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faa5fa2daff540061f5ec621761e839043685123
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_contribute.txt_chunk_9.txt
@@ -0,0 +1 @@
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_eetq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c75bbd7df673ee29e0ed0f28597a3f4fabb2b38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,3 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0464fd9d57b97fd20d9860e18e5554a080a0de52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..009f4295b408e9f75400437a7066b5e1f611412c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a403e4e499c6e4898ce9ca6e9488d24faccc24b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5dc76177b71c424faaffc3f38f1f5924ed7754e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7afc63248daeccf18ba64a56b8b43473cd9bb98b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9633a5d360d0af0b8026116268bbca53274d8c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_5.txt
@@ -0,0 +1 @@
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f99418ce8b7804705697f9ed3926f9d6fd0ea825
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a6c2083e6387725beb856633c948c5fc1fc11e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..451e50f2b819a80a294fdc58f903cec85dfa7749
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02bd0a8952e9224aa6b089577ac26844d4c5d68d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_gptq.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178494135484efa90b03e9bb0e57b39c138fa787
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ded36f2fcca57c803a8561a8f418836074bf88aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03394dbf60a5d972340e41fe6c2b440093b6751f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_hqq.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_optimum.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59c4f7ddfe719ce506e4a997fa25d5ea16098f9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a6694d66e0517f651f3612bc6d005aaac8e871a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21d9b0069956ad08c97c5a74d969dfde94509fb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_overview.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a5e007cce713b79ae9031ef76eb2433fdce9357
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1 @@
+Before you begin, make sure the following libraries are installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28b28aa9a4ce9c319baeabcd4eb5da4f9b71dd6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e704e57359094cd974f9571218c89f5aaf27fb12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c833678cc81e8fe529801a858a1d13c8b8ca18d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59ec06a4566261226f1c15e2e77606eb3d2a37cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/quantization_quanto.txt_chunk_4.txt
@@ -0,0 +1 @@
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3af1a6e6711ca046c86dd78258651aac17f6ce1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30e3e3fdd5cdc3001a72f2d5ac449e49540415e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f286842411052d9e86c549567193c55b5fddea91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e75d0af77d933a924c16dd0f61bb9f7e462756ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bed68ec7a9d80815911b7d48e709520223114084
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f6f58f118e4466258958dc44b4fc60cc30e67df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_13.txt
@@ -0,0 +1,2 @@
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d3bb4560365b054f347e7b190ee85290acda0cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c0a464cfc36d6029f00a67a3f7fe91a674a4b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2a013d2a0a861de8f24c1c878190cf564dc4209
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1f4227d1a80fe8429eaad16ea1529e5fa695586
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_17.txt
@@ -0,0 +1,9 @@
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4620cf1b26929c78302fce41e3089ce2bc95d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_18.txt
@@ -0,0 +1,8 @@
+pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b33de9dac3f67fbbbfac1b4f805d2b9972a21243
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f14fda5446596e943caf6edfe0249bb53c46f62c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63ea0ce88b06ca5f7556ad3c3770f66a4f41c34b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_20.txt
@@ -0,0 +1,8 @@
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af66016825342bded2eb6dd2674b03181aafcd3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd57bf5bf84414a3fcaafb09b91e3bd979173153
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_22.txt
@@ -0,0 +1,30 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3dd89530918ab07dcfd817856f31e9d2e2d3697
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04c14f0d7e7268e4e32738024b042de47d92043c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_24.txt
@@ -0,0 +1,3 @@
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c034b401c5aaeca220f729173a07b38c099e3db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_25.txt
@@ -0,0 +1,7 @@
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c982cf85c2fc3cc5883ba49f1c2c78d0ad62c34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1eb1b895ac3ef5bc8b8a03a8a24ba7d8bc0f583d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_27.txt
@@ -0,0 +1,7 @@
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6549dc7404e6d24fc7f93a8b9e1b7e5ce5160ac7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_28.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f00d2e4efffa3bd6bf6c6e251c2b06441bdeebb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a42569ead82da998a088ac72ea24e9d5045c815
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97a7575d6128a83b45e6ef1736fad4e3ea2314ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9f8e8eaefb8629a0abce8f213b6ce716695eaed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d00098904e0ac1b10ddbd48331306cab93d18421
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a176b7d9705a872cd457947faf774c357fdce927
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_8.txt
@@ -0,0 +1 @@
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87a5146c92611e82a5fbc6c20f31e22e9ceeef34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_asr.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eff79991adf3d268b59e5a35e017ed0884b50f0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9abcaabd010999daf7692cf84bc0f8520327d153
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6e879ce1afdbcd1c250d985ed8a69da1dcc77d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ec6cb943bfcffdd45e9068921b4b76590905ade
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3df8e4cbe1b110711ebfab2fc21b84bec85a46cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_12.txt
@@ -0,0 +1,4 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90f75d3fe49b98a3bd1cdbf5af24f9cee9c342eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d4293c41cd44b1b6de5428a2ad670639c968e31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac6e5e9a04d69e733080cdc3543e8f052c26006
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c57de8ebc83ed877d8e18896e27e86fe7ee24e72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3075851ade9c0e124a3ca81b8b5ea9299ef85b61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_17.txt
@@ -0,0 +1,24 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3e430250a3c1418ab8c794ec1c5709f15c1b7f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04c14f0d7e7268e4e32738024b042de47d92043c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0124f5b250ea578898726475d830212c3f38431
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b98af0607b314eaa776e52b5dd442ad01019affd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e96a2169d088172ead565bae0a65df296d8ec314
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_21.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80b28f709be2da10c95404f037b51792b29c7a6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b3dd5e788afef431652fee587dc6e293b10a6e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_23.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d2c08f1f4fdd6676bd7c8255b68a73e765fbacd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a71aedccd74e711c19679aef0d6069e6d882cac3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d5ed3f525503140076303bf5a738efe0f351763
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b778dd3e97e153a150e1f803c20e0472541cb9eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1de5ee32f9819512854663de6109e52a3130f5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7056b616b69ac8e1fa8b69c826964ca0a595f5b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_8.txt
@@ -0,0 +1 @@
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6373d931cba23a90ca775df8471ee172dd1b109
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_audio_classification.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25fba7cf47b6e46d38b4bd9649a69a6406cb8493
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b8fd932b0748e1ccf2147c8a0fc0b2829b7f619
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b860696a8dc01520a6b1acdd247ea84b58cb4ccd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_10.txt
@@ -0,0 +1 @@
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..184f49c43bf2768c6c6788c63ae8e931fa7cc793
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42afd25215e379cd15b94c021ce7b24b6130ba32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_12.txt
@@ -0,0 +1,6 @@
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2220f0e5c7645beae0d55edea0124f46e0d1a258
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_13.txt
@@ -0,0 +1,8 @@
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04af91beaff4e4bd565d90f7a9e8af4cf84b66e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36246295ec14647220b7c14c11772cabf512f73c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d87f59831098f6425c21eb7624219341fd32547f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5ee44c8c3f31081f048760bcbb15d2bc2bbbac0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d87acf98beebb8a27c7df309041f18cc0a1d4faa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_18.txt
@@ -0,0 +1,13 @@
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebd8210a5d5efbc5cb3e17a44eb0e4c06fb2e1df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_19.txt
@@ -0,0 +1 @@
+To illustrate how this function finds the position of the answer, let's use it on an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76506c7615d9fba6e673ca86dae2bb7a3ce83fe4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0452f1b6c87afc2f7d7c85b709d7fe7fa217c040
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_20.txt
@@ -0,0 +1,13 @@
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..426da9942a4d41e647551df74d0eb634deec259b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_21.txt
@@ -0,0 +1,5 @@
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f32e58c250e12a504538073c7330d1ef5361cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b556a880c24c24cd741b8e7106ac0c57d9ea047
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3a46407e0dcd38f5e34a1e779014302ea83e220
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_24.txt
@@ -0,0 +1,45 @@
+# encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61d7b867483124b0c8252d2cbff1a1599cc4e498
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_25.txt
@@ -0,0 +1,10 @@
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cc57b87a66e6b8e2725ca04307324841e9e046b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5382f8ed1105fd204dde1050cad718a9bbb0a72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_27.txt
@@ -0,0 +1,14 @@
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62ccca76d0a8331dbb827b40e4a2ff8264222c8e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_28.txt
@@ -0,0 +1,2 @@
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4ff8ef84b9c7ada1fb839e5dbcee9347b1c593a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_29.txt
@@ -0,0 +1,3 @@
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5934528ad62b16c6a815e177812b7e6a4e2dedbe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2549bd0af0999e5356fc9edc5929451357ba28c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_30.txt
@@ -0,0 +1,17 @@
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22038c0f0ba4f6108b2952ce17e94b54ac3c987a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_31.txt
@@ -0,0 +1,17 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a81a3b382c499a49e83b373db29db46fd3ae4a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_32.txt
@@ -0,0 +1,7 @@
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cafdea0776d9b5f498209cc225a6fb81ad57fd7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_33.txt
@@ -0,0 +1,10 @@
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98d86e2cf72e36bb298b84bb44a595f60bb4c252
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_34.txt
@@ -0,0 +1,7 @@
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac91a57ea2fd938a88d87d3706006801fdf07e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_35.txt
@@ -0,0 +1,7 @@
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775c3ac31210e0add3a732d58796d92f5d3ecad4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_36.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e15d381a384eeee9d536930c33f154458e9b9c56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40a03d29687b31bbde3574104c08b29af5ca0c67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..512545cd6ca35f59f46f342e97864082ec6cb839
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c68ec1803676b7d9f6edc7e5d935a42016d77a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d5793bd17ba20102869e052f5b0fcd1615d2b0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07fac1af8ee404beb831b73bb383fa2b8a8be65e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_document_question_answering.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7278d718708f1e65b8eed46c3339aa839e5c4ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18ca3607ef960592b888ecd49c8805e1a2fea6e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..695cb7fa6dad1b80c9b9f30e145222906f209796
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44048c1cb93633aaa8af93ee675d458ea791c372
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c0a6802f42e72bf9a292890b7a051848f4863a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..312a9901421699ce7bcd09f16902cc81fad8788b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_13.txt
@@ -0,0 +1 @@
+Photo by Juan Mayobre.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4563f11eed0c8a560e90dfb372b50b54aeb43445
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1779eab4a52105c1b52f8236d500c636e346bce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b45611727e238f3e9d61b3f94d3567635b47a67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..280bca6f8bdb5cdd4d02199c576bbe5022765f8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_17.txt
@@ -0,0 +1,12 @@
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..faf371e310ccf117eb8598e9a5bfdc85e1c1d9f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_18.txt
@@ -0,0 +1,8 @@
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand: 
+
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2eec7c7c0d96fb1cbaa20a41c2a1c9b8aa4fad2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_19.txt
@@ -0,0 +1,13 @@
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c68fedd1273f1a7346c937b22f5c06574bde52b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2afbd2b7e5932c5e0a7fdb81c3587ced499278
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..471940bfad6f00a5d5585d59b30af85108a1e0e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_21.txt
@@ -0,0 +1,11 @@
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89da578f6769141e11588418b2784eb36ef10411
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_22.txt
@@ -0,0 +1,8 @@
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ad00a3fa078d05366c8cb3ba571782e34840493
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d75ea0137f6a33db8e4a2495837858d79fd114a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_24.txt
@@ -0,0 +1,3 @@
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76efef7aa8201cb7ac259295f0e1e7ba4045c81c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_25.txt
@@ -0,0 +1,19 @@
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5f96777d4a880a560945c455d2e947f80b73269
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_26.txt
@@ -0,0 +1,4 @@
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fff2063b4c0b800e012bb5d637c63d4917a2ca96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..472ed811967de8d1c2d6ff0f69d2f4197652efde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_28.txt
@@ -0,0 +1,11 @@
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a08c63195ef57d92cdea6f6d719bbdf01773a3b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_29.txt
@@ -0,0 +1,7 @@
+"\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d54c59811a059b523ab8fcb27797a1266218606
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8f15d4d2295ccacf850f001252e18eb7b1bf994
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_30.txt
@@ -0,0 +1,11 @@
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19f0d5558ff6c1798e7c1d857bfca1ebe7fae03a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fe1c08ddb30a6db8e23d1b15471232be6c7fc19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5375226e730a6a01cd94450b19bf2479bd830dbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22e8b25063dd812c2ac211fdea11358111eee996
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00e12f91ffdd76be3f828c3145920d93d7687b58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acf8046e7b025505de8909c8213547a73ef2d0a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_idefics.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af9403e776d0e09a48e9eba94045ba136450fd9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbe2b2ada87238aea9a114d9a624e8f674f4d2de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e7dfc24d987dc1167e0b45de50e7ad3e6026f1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_10.txt
@@ -0,0 +1,24 @@
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8ee1301596b1a9270e04e499de5ec4b0d51bd94
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15e14fa11c1ef60017b67955e7977516ae7e258a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_12.txt
@@ -0,0 +1,2 @@
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..463de6be9ef5d61c434851c8e57267363151b499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..291f191526ec29503cffd1f5b1252d5a38f4e7da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ccdbddc959c15a8b789357ad4c684b328ba07d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7520bad9ddb04f7c674d91b3930468130c16e5d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9424dc4cd4357b12138f47a3936946a09f3cb9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..256cbf764fcd9a2bcb89031bd654a299f3b64088
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..051cf9c24ed8c5ad3c086e9c66048f5efad22f37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13e5f97013f14f0027abdc60109f3e60a119a943
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_captioning.txt_chunk_9.txt
@@ -0,0 +1,25 @@
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09df42334a553ce3095d587660ed030b1ba0eba1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..362b47e4bc5c0031842de65faefc50090128b661
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b5217c7e1d72cee9a575426f6ad6b926e831970
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cc902e1dd6bad3630be5ca1ad401484e94037ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_11.txt
@@ -0,0 +1,20 @@
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e550c44b41f9c561342365e4ed47bb4ae668da62
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_12.txt
@@ -0,0 +1 @@
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..543011c16319cbc9beda51b6762fbd1f3fb734f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_13.txt
@@ -0,0 +1,16 @@
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ada5f482ae2c7863acdbd1d82b81e0b96f212da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b13384fd35014735199fe418ba60e7e62c0f9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..239c8dc3fbf32d11326a05c82400ef6bc35004f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6940297a3cdf5421d75d9884323583850396e25c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44a12d707a497d4172a03ac09866203570e54cdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_18.txt
@@ -0,0 +1,11 @@
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..535b92edb60c78d81ea0f217b4bd0747b381ddf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4e60c739efe0877d9d589154f80980e60ef5c26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2204866190168c307666129bfd050c00f1dc7eee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_20.txt
@@ -0,0 +1,26 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afc3e42b7f7dbf13c6227f4a3892ba4fae245f28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_21.txt
@@ -0,0 +1,5 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2855dd5ce212061d1ee332043c61e72aff55a0ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_22.txt
@@ -0,0 +1,8 @@
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..852af5baf647c333bca5442dff4d3820eb0bb966
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_23.txt
@@ -0,0 +1,14 @@
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb5ff3d0b94d5e45a88fc4bfb8a09dbd0898c21a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58033a0343ad6acf641f701c606fc22cace67365
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_25.txt
@@ -0,0 +1,10 @@
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25d28cacb825f8c275b9ae747f5046befefc3246
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_26.txt
@@ -0,0 +1,9 @@
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e882a7b0d6da14cea9e3458f8a24ae06f77e771
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_27.txt
@@ -0,0 +1,8 @@
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01d6655edd7ee18dac738e2cc76e2a9d6c088391
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_28.txt
@@ -0,0 +1,2 @@
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1a0542e37257b9e5dc239807489c26055d4fbc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_29.txt
@@ -0,0 +1,11 @@
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e95a980826382f16c9c27a52d929a1da4ff23df2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8defab7ec1e8904e0f074cd904d32b7793cfb9e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_30.txt
@@ -0,0 +1,10 @@
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75ffeb448400db30cd3b78bb490aac6d1f4223d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_31.txt
@@ -0,0 +1 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4559c5b0a79c412fffcfd41767478d9400c62404
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_32.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..916cbc76488c669e41310ba488ed91cc22ac1e7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_33.txt
@@ -0,0 +1,8 @@
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b69980cfd6749fa20019ea5437937bcd5de77dfe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_34.txt
@@ -0,0 +1,10 @@
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5f4b582b33cd9b96e3d860b242c45722409033e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_35.txt
@@ -0,0 +1,11 @@
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..805343e7d40efe4250d7a9508c86da3652f52fdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_36.txt
@@ -0,0 +1,5 @@
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0867ed9ff58057814c5041a38a25dd76aac6e4ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3a46be6d46df94bd37ddfbce654ea32b5c68a4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d26732a6d6819d90175d8c8a6a1305cf867b301e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92c6f5ee8110f06635b70cd0e332f0b60162c9b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..975159b8d29f69b88ef458cde0fc23809d8968e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..460cc16d5f614c95b491136efaa0fe4e8821fdc6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_classification.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed25dc14a71fe92bc1703bfb7579f0ad47347811
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68022bb0b62a4b0f1a91435923f2ea96133a6f4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a653cf432f41eab01c3829c05bc37f97b7c05ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1a98cbd50803f002837f7b79630936893fcb5c6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52c99086705192875873b8b47ff95428048605db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..071995d33a82a963799145cd6712fae99ee268e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff4d4ddc67c6736934c2d80e8a2eea0fb1f68a0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e86f0bb5032481a4e6f4d36071e18eb1a678ba3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d82ca1f92c6b39131aca30bf617246073b6f84b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_feature_extraction.txt_chunk_8.txt
@@ -0,0 +1,17 @@
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4267fdba1e469d1e89797f21ee4225428a7bdd5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5486eb0fd6fff4789dd23f45d99e86f2c70b5444
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01926b5c8e044c53b47092fca8deada1eed7f355
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..499ae9e1e9a5ea359487a0a7b2c68465a222cecd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20896274819b4f93896d9c48966e837e64161c1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd5bdf3017a5945a3c02419fdd848eaefb404d0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_5.txt
@@ -0,0 +1,19 @@
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c6563f1b78b5c688c347aee126668ce5745592a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_image_to_image.txt_chunk_6.txt
@@ -0,0 +1,10 @@
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e58845df468d4e2bdd22c72961824db15917dc2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6921c301ff7606c603f6ce115ee765f15e0e1682
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7593c9592e5e5cedaa11210b0865e3cecf2302d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bfbdc2cff7c8e3f81d9c40810da49e7f1a99649
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9da24d425b42abe897fbf446e1f4c35b290cb441
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1b93485d5de46280e2c295c835403d266c48785
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+# Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06f86f41a4cd981e28554bc6e088e743ae6fa131
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_6.txt
@@ -0,0 +1,29 @@
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a12836b24029329f021144826d3d1561bde458a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_7.txt
@@ -0,0 +1,9 @@
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0586975e2d779bc25a0c4fc4f908495c9880f291
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d21de4d06b6fe192aa1dcf71c088139d05c2ce3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7dc3192832dd628a53ad7df1dffa0f1d40438cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..507d1f8bee5ef57523a7b5b6615ed07ff9081186
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbd597e24521f837473df18ae467c7773a06acee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b483a0918927adbd3d21ea248e867f5bcbeea43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3491a5a250ca8ce1f63c4c3033a48730b3667aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_12.txt
@@ -0,0 +1,16 @@
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46fb3748d0036eb8389ae81dd4ad5cc32254c487
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2020af4a18b8c3bb9f8a8236d5dc447b793bb48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae344a1c2dd422b699872cc14b72f03f87978839
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_15.txt
@@ -0,0 +1,8 @@
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23bca906b92b7077e15672d17f8379fab1e679d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d7ae3705d9edf4e5b8cde745f24d9905bf68d7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_17.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..673f6b32c9790546c9238773e4dfbfa7688c1ffa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_18.txt
@@ -0,0 +1,17 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0175f8bcbd565407b873b939f757fbbbf31532a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_19.txt
@@ -0,0 +1,10 @@
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8067fc3fa06897ba68240e386471badd8d5a1104
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fd6e264dafe1c7ca83594f5c80a5b78b93f85c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_20.txt
@@ -0,0 +1,9 @@
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc341e9c485b656303c07e2cee251293768861bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_21.txt
@@ -0,0 +1,14 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d87f7a9014e18675e67fc7d3452339161f186343
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_22.txt
@@ -0,0 +1,6 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd7166b81c829ece5c909a7a2d631b9c378c5553
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bcdf5b6257a4db649782e73a85df430df89c28b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_24.txt
@@ -0,0 +1,11 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be940d25c39d259b7a5a01a2efb260856cdc6f17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_25.txt
@@ -0,0 +1 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..446796549cfbca3f899f4fbd6b23e5db9b707c89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61271d997451ee20640b3d2a2eb6fd905dd2ce7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eac732f59eb826210368904e0ff5799113ef09d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_28.txt
@@ -0,0 +1,5 @@
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7675ee67ce76602ba7a6465f22857a8467ea86f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_29.txt
@@ -0,0 +1,6 @@
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7aea152c73e09bef5c529216d216fbf9ee95a371
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33c8c0c5b8c15c9100a784d3a0097262cdb1353b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f098df9069a8c0689524b442aa1954cc3e0cc81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_31.txt
@@ -0,0 +1,5 @@
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..128b42b2016d0ebe1bc09092e8c6b568de37a6ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_32.txt
@@ -0,0 +1,2 @@
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c0315b591bfe2b06707b8ccd4465181909e5886
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,3 @@
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec337b4059b1c8d378c6a390f3c72bca6722f85f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,18 @@
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae57a397f010d688e03632e88888e2f7cc282ada
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d72d0f89b6d01c3949f3968769233207adc43ef7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ddb202cb50226e6a7568c9b8eb0f2792791943a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a2dc67fad3aa985652b37b03cda3e4077474bc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,6 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cba54e49745b1ef8e5eb5692a5e5605ae9daa22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14ca2ffbaf58c73711e823847c2a7533f7fa080d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..371dfbd6933baaed77913a3fc767d178f9e9c96e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_10.txt
@@ -0,0 +1,17 @@
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcead5f553bf28bea4e3cee2a0f61ebaa5b3fbf4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51825ef33d78d64220ce4aafe2ba8fa243acc8ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ea4caf28098970f5bb6f9d058cf44f278e5b0b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34b1ed3a24455d81bcf66acf05952e70e2322e21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e347c9ec242574cdf9f8505e762ca642d10c946
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6227bd2ae3765e6e257807b88b58d00daeeea9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_7.txt
@@ -0,0 +1,25 @@
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23db8fa1df0d9a378bac456b9982ffd4fef3be43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c08972949ee2d52b4f6cdbc0e186ef92b3345d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_mask_generation.txt_chunk_9.txt
@@ -0,0 +1,19 @@
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1eff0d057a71e9894d81ec84e3f57eefa172f4e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db2ca0dffbe5904d74a80f75dca8aa947d136cc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc825619f9071589585e8710895afcf75184391
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b760b5c1bbea67095a3a8eaa61e859eac70e9493
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_11.txt
@@ -0,0 +1,15 @@
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8789ad55b72958a566886b399221bb48069d6ff0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb86d95d40e3bce99a4ffb52ba52eb5622ca6a65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83aa40074b1b18a76614caa3c536e0f177b57e1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_14.txt
@@ -0,0 +1,8 @@
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3e5b1b542788bb783a4d81fff49cda67061a4d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_15.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d7ae3705d9edf4e5b8cde745f24d9905bf68d7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_16.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cebf1eea33c6b70a509f0168c33df726729c4680
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28ed56d28006fd689d5d0c64ee38dfa03c2ad896
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_18.txt
@@ -0,0 +1,12 @@
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428dcae02f29c292d973b75889e2183474b983a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d36802df175e551a9f98b5069529e2f04853779
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc341e9c485b656303c07e2cee251293768861bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_20.txt
@@ -0,0 +1,14 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d87f7a9014e18675e67fc7d3452339161f186343
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..361106f594380c4115055d1c2d4cba2dc59abdf4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d031217ecb771d8dc97a7c9b34847b21d06a9e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_23.txt
@@ -0,0 +1,9 @@
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb75ae83192dbf13ab738b71c05d7c99f40d4cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_24.txt
@@ -0,0 +1,3 @@
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32b73c0bb55fd026ede7fdf5d66a0a17a09fc169
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_25.txt
@@ -0,0 +1,15 @@
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20904375ff5b1ff94a538cf935d35b1f2cc42c07
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e8e19cb059a886a2310c94b76a53a62c7e20d1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8d2d7f3551af05c50b26f49f01346507755c41e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_28.txt
@@ -0,0 +1,10 @@
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef9181e3b3c98780f75bdc734834af2a900d5108
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_29.txt
@@ -0,0 +1,6 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d460ba19c7188145f4e15a6f0d5a51eaf366f74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91eda155efed46a7a40274f738744289853c1969
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_30.txt
@@ -0,0 +1,6 @@
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f129783f183893be48d57a24d6a5a181c3f47a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_31.txt
@@ -0,0 +1,6 @@
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec337b4059b1c8d378c6a390f3c72bca6722f85f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d18deac3b4ef4642f890bd5d0bea5c1a144b5aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,7 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c006b47e837c5442ca4e15d27876571a883943a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
@@ -0,0 +1 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ddb202cb50226e6a7568c9b8eb0f2792791943a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,19 @@
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a2dc67fad3aa985652b37b03cda3e4077474bc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbd597e24521f837473df18ae467c7773a06acee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32eafcff42d4e59416fb72f8798ba7d4978114c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d65ea8e9f219d9b733420f6b9a9d288f36bec50e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6fa6fbab9477bc772e213ecfa3dd9ce536d0d8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9441c861f2a990be770a76fb4e005982e0cce8f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0759c64db504e9bf10b15022de0813c85e22d45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58d380276a2b6dd6b1ad9ff8fed7608c11c27b3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_5.txt
@@ -0,0 +1,6 @@
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aacd0b43a1a4c0b1cc4379799a16db3053bbfbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_6.txt
@@ -0,0 +1,9 @@
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9decb8fc592d5bcf112ecce594445f7ef3275929
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa92a5ad929380cfdd6609e21cf97a3a79a0b829
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_monocular_depth_estimation.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b4e49b84a5ca8d2fe971041d2d409779b6a8ae1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ebfe1281e6d89d597318543be0c3f08cc3007c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656247f110e19f4642c98946f51f023243d27928
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_10.txt
@@ -0,0 +1,25 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..660c6c27a58494fb2f1ea77cecd6ad958f646ccc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52f6b8870b10714350fa5d80f7d76a27a099f66e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_12.txt
@@ -0,0 +1,23 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90f75d3fe49b98a3bd1cdbf5af24f9cee9c342eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98a1546e4edc1ac1e4f0e60cc14f61e6f062d4a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_14.txt
@@ -0,0 +1,10 @@
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d9d1bd43d2d0784df161f02d8448e6b98752df4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c57de8ebc83ed877d8e18896e27e86fe7ee24e72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_16.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04cef350aeb018d117fe8afa85b138b115140319
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_17.txt
@@ -0,0 +1,22 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4610d911a41cb895d705eb36d128dac10f7e74e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_18.txt
@@ -0,0 +1,7 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f92622fe52bd47ca6d432ee3f8d10c0f476eaf33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_19.txt
@@ -0,0 +1,10 @@
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29f5246590bc8caaa8b85ff7a4252e4a53520d34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a85b3dee82ec73f10d295dcad0366ee3fcf1fd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_20.txt
@@ -0,0 +1,15 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..259d3aa12c4cac1efe9538710db770370902958b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_21.txt
@@ -0,0 +1,3 @@
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43e572c7aa5f75ca0b7a2d70446b36c2159a3bff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db0ebbe8aa921884825a889af1ed37b1ac36d339
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_23.txt
@@ -0,0 +1,11 @@
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6403e64314a6737601700651ea2cf9ca80c2f3b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_24.txt
@@ -0,0 +1,5 @@
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adcd570407d11bb9aac9a6b4d7a0c7ad637aed71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_25.txt
@@ -0,0 +1,7 @@
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ecfb3e4c4f8e207d2d714706a63c61b8d0399b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_26.txt
@@ -0,0 +1,5 @@
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13a7a30221ba14bfc2c6be58312eebeead10f92e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4a3fad90a216db74ef2d8c716240b2d58795f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_28.txt
@@ -0,0 +1,12 @@
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47d310a3bdde15cee82408021303297fa848a3a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e054de0d327e83c782f8e172cf6d1cbc49e83a50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28fb0c3dff9debcf6a942f9b0fea69626d7c097c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_30.txt
@@ -0,0 +1,11 @@
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81afd6a1ed08601263be97b8c75c5e0002bb4811
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..834b50d2371f411aae05e819e9f0efe7e850b6a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d2447a08d8776226a5d7c61dac7865c98849c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13a643cf1780c838091b5a9f23b9eb6cf295301e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_7.txt
@@ -0,0 +1,4 @@
+first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2baa4a2c236beac355f03c382ccbabc5daa37e56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e511ee1bd76a7b1140afb9356e213e6840984d22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_multiple_choice.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..591fb3723e971f0ff4b64f38db125ebc6f354290
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adfa89b7383a6e87f04d9196ca91b8a12af3cb3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68ff6bd3154ed8e5214439192796597cbcf5803c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4ef98e3c829a064b342b662592060d2f240d70b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1e635e772428f3662f1741bc61e4d41357067d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76263dc34af431e97fea9ef5327f168fe9b3bce2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d75b800a6ce4b28822b2bc5816b65ce8ebbd2e7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c35dcd1d0300600f750ac7a9057a32af8edc772
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_15.txt
@@ -0,0 +1,26 @@
+Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aaa02e2b1c64a1cbc0855681d85f9f939352d8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4782ecd36227a05d2983b3ea95fe736163ba7314
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_17.txt
@@ -0,0 +1,17 @@
+images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d57c49288393a2ee861667f6b052385684825ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_18.txt
@@ -0,0 +1,4 @@
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..235137af53996a774382c77384bf8dfa1dd9c033
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_19.txt
@@ -0,0 +1,19 @@
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ace16dccc19d46063149e84ea3b3693a32e251e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d8bae28d6d4dca91945aba297eb72e0abff8982
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+[[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b08f07f1dab641a7a71ee703f5f3b40efff042b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_21.txt
@@ -0,0 +1,7 @@
+[[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..993c3cbd4653b6c2660db9a891c9366db63eac83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_22.txt
@@ -0,0 +1,5 @@
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2f9c2ad75bf9dbceb2556d6bd0b38cc788112a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_23.txt
@@ -0,0 +1,4 @@
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1a65fdd2b3d74e2f4a8a760c30b6064821794f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_24.txt
@@ -0,0 +1,8 @@
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f84364fc057b25fed56558662ccc62024b3fda56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_25.txt
@@ -0,0 +1,3 @@
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cee038110ca649ee524661daf42ee4ed151c11d2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_26.txt
@@ -0,0 +1,5 @@
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f6d2e3d7018dd62726ed62792798c7ce2f64221
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_27.txt
@@ -0,0 +1,12 @@
+Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d73924bff29de33d93e688d3bf36bf75249c3e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_28.txt
@@ -0,0 +1 @@
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31af40a1f12b6ca7b53551595f2723f7b5eeb244
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_29.txt
@@ -0,0 +1,11 @@
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..243cef6fc0e5ce17fde4e33f2d541d139220464a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..870af6bc275ea53a2951a048a8256f6734b0d870
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_30.txt
@@ -0,0 +1,50 @@
+Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f646f19bb0ea0637bcbed0503c2b417535b95e58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_31.txt
@@ -0,0 +1,3 @@
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bce7780d8c25b6e7ceb5cb505feb5978bc29dfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_32.txt
@@ -0,0 +1,11 @@
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5374647d778ed853d6d67757e6a940411350adc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_33.txt
@@ -0,0 +1,7 @@
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00df575c6e64fdd51de1e0d3da44026b3abffac2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_34.txt
@@ -0,0 +1,7 @@
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c219a9edb53e1caf66d5041270942fbb3b8c3516
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_35.txt
@@ -0,0 +1,21 @@
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee3fc952aaad6aec387134b425b2e972adb5f58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_36.txt
@@ -0,0 +1,15 @@
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..128c954de7c046236034cd5c44dc7fffdc7c1b4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_37.txt
@@ -0,0 +1,27 @@
+[3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1678c726b583afa6baeb5cd2d00b945ec3d0f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_38.txt
@@ -0,0 +1,51 @@
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
+
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d67aceedc2224ba78dfaebd4e9bb65bd88093aa6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_39.txt
@@ -0,0 +1,51 @@
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
+
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04ee2588687d3bb5d4a19d8c40d24c9519b76ebe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..266f29d8369bbf32a9180d489614ac15b84637ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_40.txt
@@ -0,0 +1,51 @@
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
+
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c41443ce5eb5e3a0166b5c6c6e1f762b93b03b57
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_41.txt
@@ -0,0 +1,51 @@
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f88961e9ef7cb7bedd5c47e97173e544efdd1d15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_42.txt
@@ -0,0 +1,51 @@
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
+
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d245a49baf6ae4388fc49209990e7d1c7bfe0c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_43.txt
@@ -0,0 +1,51 @@
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
+
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b79229c7b18de313639480d43c7355f48a7ce37c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_44.txt
@@ -0,0 +1,51 @@
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
+
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc06e18b41a28b0c33a70f43466a811fafa018d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_45.txt
@@ -0,0 +1,51 @@
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3213e31c62deb34df48dd460a09d2f9c3a5737d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_46.txt
@@ -0,0 +1,51 @@
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
+
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b397877a3a6543c1d44b5f4c362cb3f41a9d8655
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_47.txt
@@ -0,0 +1,51 @@
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
+
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20b158c6375fe6d9d24fd2e09eb845202bcfc570
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_48.txt
@@ -0,0 +1,51 @@
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
+
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e2730503e37da61b5a0a5f040a63ec53dd4cbf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_49.txt
@@ -0,0 +1,51 @@
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..884afad093829276c27a2029e263d49a5b7f86e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60bd1cf3b408d45c475060b2a30872f5542fe375
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_50.txt
@@ -0,0 +1,51 @@
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
+
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e237b1bea634b5eb4aa992ba0ffa2976817332c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_51.txt
@@ -0,0 +1,51 @@
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
+
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_52.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0112bbb1e92a89a6a520fbd10ca1c42719744d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_52.txt
@@ -0,0 +1,51 @@
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
+
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_53.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02f036a695756bb367ffe28ec87fa7914d01a255
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_53.txt
@@ -0,0 +1,8 @@
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_54.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e519c2072f835ff1b61fce477f6f929eeb0e8b27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_54.txt
@@ -0,0 +1,29 @@
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_55.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c646bf028b903c1f52b59cbe6110e9e8363b6583
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_55.txt
@@ -0,0 +1,11 @@
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_56.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_56.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e6ed7b824e9e7d0294c8ad79ab6bb9d4759ccf1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_56.txt
@@ -0,0 +1,7 @@
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_57.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_57.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1d5a45a5744968d6369151a9a134c8b70935725
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_57.txt
@@ -0,0 +1,5 @@
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_58.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_58.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3f06d01fe3c97b6e36c020a19f4bc87e3325f88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_58.txt
@@ -0,0 +1,7 @@
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_59.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_59.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec28681dfdf90bd4635946023fe6eb3ac36c4f41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_59.txt
@@ -0,0 +1,11 @@
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..795e1e312ff61f00053770594a0dd310b37348b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_60.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_60.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f299ccc3e5b234363f93d0c06c6aacf2a53d99f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_60.txt
@@ -0,0 +1,11 @@
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c216d7301056494daede0d55b7664c160a47ff8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72f7129ff3296d5481a5bf1d6c5acbd234b8bf48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_8.txt
@@ -0,0 +1,27 @@
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00747b3b9de11f92f8f487cdbec02463bbdd5b59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_object_detection.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b80414bf1568e49f662786f3886aac4bc39a6293
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e733742af155313e7664240b426140b34763196
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c7d33e862f2e136d5644c484f4f7f5e98e2063b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_10.txt
@@ -0,0 +1,7 @@
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aed520fc67a619e02040cae08036054ddfaa2c1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_11.txt
@@ -0,0 +1,15 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bf718cd29fde618730d8401e4fa7a2f711878c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_12.txt
@@ -0,0 +1,5 @@
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0d7590e07f3af6c925416f5f3970b282e21fa51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_13.txt
@@ -0,0 +1,5 @@
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa45308bd4782db6768bf0e2eca3f88d86125c84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_14.txt
@@ -0,0 +1,14 @@
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d09bcd7b19cfee474f91399850155b9ab3cdd1a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_15.txt
@@ -0,0 +1,6 @@
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..433b9ea65ed408cf38ecb29a077b7b8c28db9cca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_16.txt
@@ -0,0 +1,15 @@
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67a2a669dff7a3e3d3b9668029eab20cdf69cd0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_17.txt
@@ -0,0 +1,7 @@
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5e650368610e05caa3e55ff5cd4b679fd218a67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_18.txt
@@ -0,0 +1,15 @@
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f687d2eb3742254d7ee02fe856085a4dc4b0011e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_19.txt
@@ -0,0 +1,4 @@
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbbc5025317020d172f12bfa96e4cd2fd1600896
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebf4bca7cb28ff857a0f2aebbd10ad25f65b97e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_20.txt
@@ -0,0 +1,16 @@
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c76fc2e6611149928174e8f6a59916ffcbe501d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_21.txt
@@ -0,0 +1,5 @@
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f470ce00efb7f5f8539c8f47a7017a9dde42c32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_22.txt
@@ -0,0 +1,13 @@
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c51fc9c46fadd043cb39be1db9f53eb3616d11da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_23.txt
@@ -0,0 +1,2 @@
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad750ad971fd25bbd0cf2d7a31263d5fa7bbbcd7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_24.txt
@@ -0,0 +1,13 @@
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d00527b06b3bc312acaf2e84026ad8021f78e94
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..995708bfaac95e6ea35dfc073a3f4cc19d7ae84d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_26.txt
@@ -0,0 +1,11 @@
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c956e2ce48e48be774f1b731f3e40383ade15141
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_27.txt
@@ -0,0 +1,11 @@
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75fc16b91699b68de4b946c6e284d083e0a6a439
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_28.txt
@@ -0,0 +1,17 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3d85f627dabdcf5662735d240eb75b735d51662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_29.txt
@@ -0,0 +1,26 @@
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ebb4b4fe89b8e0105f77a08b9653e9bfa2d71d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Generation with LLMs
+Text generation strategies
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..226719af91e9c2506799cd1295c7b149483f8a60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_30.txt
@@ -0,0 +1,4 @@
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b8097870688eeea8618ecead2b874afc8be176
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_31.txt
@@ -0,0 +1,4 @@
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e141de3596d50ad2210ab117bcfbbedce1a20629
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7387f9f7599b30ba70a0ac182ee8c850e8d0e9b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e379c30b54aaf92b94133d7a03383539970a4460
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca2c771cb763aca2ac1ff978d50c66d27d11c8e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ff2c60a8b96d79ff861b8ad9e7458071ca85217
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36f8fd6e14ad7f30ded577683ac9488fe95aa437
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_prompting.txt_chunk_9.txt
@@ -0,0 +1,2 @@
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac80c11a141085d2a4e962f0d56ffb1d88ae956b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac29a6211a3769b51ab9a01beb6002debaba989d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081ede73a33876ac1b8655a494af222625ee0370
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_10.txt
@@ -0,0 +1,3 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83f75ed07ec0459073ae7ced3e29b4ab283a84e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3413e100141c69a2e6d826125867b01821ab92a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_12.txt
@@ -0,0 +1,10 @@
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a79dca307bdc626cfacd605f53402eb858546af9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_13.txt
@@ -0,0 +1,5 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0bda02c3da7db5db5348c546ac7c22cbe9e0ca55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_14.txt
@@ -0,0 +1,19 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4610d911a41cb895d705eb36d128dac10f7e74e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_15.txt
@@ -0,0 +1,7 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54dc38bec64c98340a6d72005e8dc1e89da527ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_16.txt
@@ -0,0 +1,14 @@
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6368488c0d832993e4a8379206530ae79f259e16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_17.txt
@@ -0,0 +1,16 @@
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d505aca8dd86a637d8a721517ddcbf17f6df4a1a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_18.txt
@@ -0,0 +1,12 @@
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c6ac50b2e56ee0b3de8a7fc0f92b7ed2c9ac0c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_19.txt
@@ -0,0 +1,5 @@
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97097dd11a7aaac5fdfed6cf59d856a90814fc32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b55e953a1c9610ea7baa29039ce36c9e29450fa3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_20.txt
@@ -0,0 +1,3 @@
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d77e66e46ed2d66840bd5b6c4b4230ded5cbd356
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d68d49fde1523e4e0b02a81e506b173974811bfe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_22.txt
@@ -0,0 +1,4 @@
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1a323928f5293b9b9cb0c28f01db9c864a5c0fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_23.txt
@@ -0,0 +1,11 @@
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dab3da0b2ae643a64f84d4116b2aad08cf43c15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_24.txt
@@ -0,0 +1,13 @@
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5cc2235273146ed11b3f45a331205b77d71b15e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_25.txt
@@ -0,0 +1,12 @@
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2940f88d9f57983d0960e291ddd677b1b807bdc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_26.txt
@@ -0,0 +1,11 @@
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9588e6ae69fd7dbe8a0a769470113d5f62ebce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_27.txt
@@ -0,0 +1,10 @@
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c639e8b9aea6ce2fb2c263ec31f5fdad7d30b460
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13da8d38d015b1bb4e0f410782e3947ab2d2cf6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bd27a997feba40a16f4788f3cc7f4cf79b69a6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e10ca1168b70c5a40c7a6adaf5f875d1ce03d21a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_6.txt
@@ -0,0 +1 @@
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c053b20ac5fe8a5e10f2e785bbbe038099d1cf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76617473220cf706023d095424bbebf1fbff8ad7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46e28fe79f70951ce5371fa58c3c4d941bedb2f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_question_answering.txt_chunk_9.txt
@@ -0,0 +1,34 @@
+offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d48f054baca45d686f1c508caf363e0424e3198
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Image Segmentation
+[[open-in-colab]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16f5fa557a9929c6d3d0b8b1cbb46d40e3d3cd2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb6dad7faf6a721c116aaeaa64aecdc2f04d923
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da59da4196f8f192151843c2e3a1b4358a1901e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
@@ -0,0 +1,7 @@
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..062f3dea23ae21dfde4d1055cfa72b0dca6b60e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
@@ -0,0 +1,19 @@
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3db60a97154ea379e672dcd9749fa222a8900fd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34afb77db9ce079b03a09175835aaf3f5aaf4f75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
@@ -0,0 +1 @@
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..feca1290f178283aaba646882c5a96ddfdbec613
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aabfd21bd8433d44d44d504c23a27cbe4d096a3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
@@ -0,0 +1,4 @@
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3ca410b319999c7815aca6c774885ef9fcafbe2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
@@ -0,0 +1,24 @@
+from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38072e131ddb30c651f259d3ee4111af0bdda71a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_18.txt
@@ -0,0 +1,7 @@
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd8708b4c57c9406c3912d19adc6da00dcfa7701
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_19.txt
@@ -0,0 +1,3 @@
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68329c4a556f16aac3db4b2b2b65cb59438e67ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..526dc2cac7b9e22060bcfbfbeece4dd2e20235df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_20.txt
@@ -0,0 +1,5 @@
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..132c7829a20ef7254e8ea31e4bc017624276a821
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_21.txt
@@ -0,0 +1,2 @@
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a11fd0f493a2b4e01e73a4c44190988e62767901
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_22.txt
@@ -0,0 +1 @@
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f4d7828c09c7821a7d282e4f2a0303dba50f024
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_23.txt
@@ -0,0 +1,10 @@
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2242d4c86d1999caefdc9632f6e1891fa98d55ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_24.txt
@@ -0,0 +1,4 @@
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f305512bb27f69afafcd3665bf25f3056b667406
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_25.txt
@@ -0,0 +1,6 @@
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..555f2243651029116377a309071f11c554a10878
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_26.txt
@@ -0,0 +1,13 @@
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..207f64de89a34ff373efb3bd9fd7671b5f371dcd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_27.txt
@@ -0,0 +1,3 @@
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf52eeda61bd9ae205dce95de1eb0ffb1bef708b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_28.txt
@@ -0,0 +1,10 @@
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5046ad936a9b5177793f7b18ecfe6491a974e4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a52ed26ef4cb3370aa99aa0eb49e1d43d7828b45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e508fa7f549dfe94a6d4e46edeb1dd1bd061ce58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08e3c8c3be37db82988efa012ab749d78ca404f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_31.txt
@@ -0,0 +1,5 @@
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95d7cd00341827b35a3b0ba1f6a5efafca991d52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_32.txt
@@ -0,0 +1,13 @@
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c287e037b91e813d47941bd23ea7e887bda6a13e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_33.txt
@@ -0,0 +1,12 @@
+pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf78afe038eb7e33ddac757785909b04f00d1a5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_34.txt
@@ -0,0 +1,8 @@
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db9aa7f2c7790c4c1d4b8d1b787fa47d01750cd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_35.txt
@@ -0,0 +1,13 @@
+pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81903436a44d3a78f39ead7e01939c138708ffdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_36.txt
@@ -0,0 +1,6 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23a4f8bd2b04ef05b445f37b75283440f4a7f3a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_37.txt
@@ -0,0 +1,4 @@
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35a051d7e174aba32f14fc4202bee90a3b1b26a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_38.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..022400b9b9e8c5e4ff473304fd46083f8aff4c8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_39.txt
@@ -0,0 +1,24 @@
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cc00ca91668b10879c88a46b9c101690adda498
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afc3e42b7f7dbf13c6227f4a3892ba4fae245f28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_40.txt
@@ -0,0 +1,5 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_41.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_41.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20b5dfcce9e50876cf58222c792b094b1f781145
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_41.txt
@@ -0,0 +1,8 @@
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_42.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_42.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3264bfdc473f251eb853bf09b76834762e099a3b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_42.txt
@@ -0,0 +1,12 @@
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_43.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_43.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0396aa263d0629c9c455bfe7585c78c82e6cef48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_43.txt
@@ -0,0 +1,2 @@
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_44.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_44.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d0b44254e72e2f1abef2305fed5f2e46d06b2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_44.txt
@@ -0,0 +1,9 @@
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_45.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_45.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c24041243a575181a4716065c30936868bad40a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_45.txt
@@ -0,0 +1,14 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_46.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_46.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c711fd75e74feeac0bcc20a79a79536677f67cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_46.txt
@@ -0,0 +1,3 @@
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_47.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_47.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22eddd01df614c77756fbef9a7337e65da9984e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_47.txt
@@ -0,0 +1,6 @@
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_48.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_48.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18c96295b35564adb9a2fcac604acc6de871b187
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_48.txt
@@ -0,0 +1,11 @@
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_49.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_49.txt
new file mode 100644
index 0000000000000000000000000000000000000000..696227bf50f82d2844659e2ffb511f534c25e190
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_49.txt
@@ -0,0 +1,12 @@
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5faef916014208cecfa07761f8052fa61e2acd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
@@ -0,0 +1,33 @@
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_50.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_50.txt
new file mode 100644
index 0000000000000000000000000000000000000000..568b6741c5947c5ec2fc8ccc0d602579225ca0ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_50.txt
@@ -0,0 +1,10 @@
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_51.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_51.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52d171950d8182733152961e776218bf686d91bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_51.txt
@@ -0,0 +1,13 @@
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_52.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_52.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed21668b407052d10014f137e7e1c17dba9a564b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_52.txt
@@ -0,0 +1,7 @@
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_53.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_53.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9f8e857681008cf5d99015e632f6b05a22b2861
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_53.txt
@@ -0,0 +1,7 @@
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_54.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_54.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a5987e2e6a69f0b9b1fbb5f7a4dd0354ea8cfdc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_54.txt
@@ -0,0 +1,157 @@
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_55.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_55.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de327c3d0fa30a1ac6efd00b18e375aaf7a8be95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_55.txt
@@ -0,0 +1,12 @@
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33019cdfeeae9b4cc175ab579bdeb9f70b6a441d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2df9e4f9a439a42e577a9d557bcdbc93a5508100
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b4fcc6a02493b10c9f65458c0553efc4ab4aa11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
@@ -0,0 +1,6 @@
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cb30a16140a9108e3e4a4e766a4f1ab0f6b90cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
@@ -0,0 +1,28 @@
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..040b36539c7f1e79a181b651056d35e5995b3cea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d98c535fc07219fbabf41fdcc3a31ef5517bb31f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c939f3a45441deb10440f00f1be742d7faf58d79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..965e2c30d1ffe6b658bbd863aaa0ed877455618c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c57de8ebc83ed877d8e18896e27e86fe7ee24e72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53e1166fbf2f21cd9591323edde80f2c8fd7e437
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_13.txt
@@ -0,0 +1,22 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bca06af2683c74e5565c3c60b4af94a7a75a11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_14.txt
@@ -0,0 +1,7 @@
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9220af54f336606b7be8927152b3055150cd1025
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e5fc7e6d3f237529c04d9d57fca308760c09f05
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..573678d2fce9ccb006d94293b622898c585e28dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_17.txt
@@ -0,0 +1,14 @@
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96ad2cc7d032aea11542874f41aca0ae5c286cc3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_18.txt
@@ -0,0 +1,5 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db2ca2fa8fcb6ef008355330855488230828ef8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_19.txt
@@ -0,0 +1,12 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8853b0c206c0dbddd97fb693837d481bf16c6e00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cbd6eb1fe0973594b8c5478d002ea932d49c926
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_20.txt
@@ -0,0 +1,9 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb2e043a41ef7c0251b0fd1cc70aebe626af9f6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c283dc8d9a6a5829883c70c058511b163d470378
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_22.txt
@@ -0,0 +1,10 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61d112c96f6c6c60c15cdc5f68e15882a966abc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_23.txt
@@ -0,0 +1,12 @@
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fb9c15c1d416c784efb48686f8328c62e91b050
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_24.txt
@@ -0,0 +1,13 @@
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..579828fac17a56ea5b039fd8a2a87723348874f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_25.txt
@@ -0,0 +1,9 @@
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0903128b6f1e0eb2c189c57401a2645029e28bea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2138f72110bb7608f025ff9a775b754cfc011eb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69f512e95659b6858fb4209a57d607c6e2b9477f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_5.txt
@@ -0,0 +1,2 @@
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af5c347a6527da1d49b308c19f6c0db3dd1b92dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f40838130cd9c4beaa37a14c3b4213357e8b48c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90f75d3fe49b98a3bd1cdbf5af24f9cee9c342eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5207f856f30001f810496879e91fa6c04f138529
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_sequence_classification.txt_chunk_9.txt
@@ -0,0 +1,9 @@
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcfa05ac42e16d97dbfaad7d0b7a90b98196cc09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4276cd1170b7fb80afd1250e51f2cc8827e30271
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b199158ffafb10a0cf7f2ae08410f4b4ee68a826
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_10.txt
@@ -0,0 +1,4 @@
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcd589bba2597e6d544f71191e3f3a3c685c235e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_11.txt
@@ -0,0 +1,6 @@
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0045e1ffc40e57b935bcc3fc51e77454b07350cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2823f8188c2abcb4bded1a273de32b6fe7305304
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_13.txt
@@ -0,0 +1,8 @@
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..698937474138747cf420cee369a613deede6bb3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_14.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0430cab8ca99056bc050f9ed393139e08bf2459
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_15.txt
@@ -0,0 +1,23 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4610d911a41cb895d705eb36d128dac10f7e74e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_16.txt
@@ -0,0 +1,7 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..003103bfc2fef570f39e3c17180598fa963c4134
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_17.txt
@@ -0,0 +1,9 @@
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97008b7cee046aea5a3ad574b2f4383fc921cc53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e57c5432ead7453bcdef59f3adabc69d958fc73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_19.txt
@@ -0,0 +1,5 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd67302c69f064bc9078bf86f0894975f31eb001
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc80bc0abcdaa8541f64bc0695cf65965bc43ff9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_20.txt
@@ -0,0 +1,12 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09bff607990ac364a5e4c7109df1ebc7af1b11c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_21.txt
@@ -0,0 +1,9 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1fdafa89c7803269daf897d89c4127f1b46d3c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_22.txt
@@ -0,0 +1,7 @@
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1909da957ad6176219125d558c32ca1806713634
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_23.txt
@@ -0,0 +1 @@
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e01fce21d3938efbbd4d894140fc0d0308de3fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_24.txt
@@ -0,0 +1 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..227126fffda0827a6e5b1374e46fde5f14ab79c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_25.txt
@@ -0,0 +1,4 @@
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4b2fcbdcef5950761a277e1289ded02d227b6ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_26.txt
@@ -0,0 +1,7 @@
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b6c9e78a2a274d44ac18ef7d81b07af9e8ac4b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_27.txt
@@ -0,0 +1,7 @@
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93b0130293351d220e0a4f9d41885e4e6499e0cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_28.txt
@@ -0,0 +1,8 @@
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53ba9b9c07dcd2b21a4f2e0de1a547dc8cad9347
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e8589e401418b3b45c6dfa9e8b2d0262374e952
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ebd7cd48ac57d2396c77404aa651d32d3a4ee20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5479970fe7e80d78a5525b7d24849b54e74eb9b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_31.txt
@@ -0,0 +1,4 @@
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25b4db4006c327cf29b437cecf6d65f51dd9ec7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
+ 'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97b3e821efc4acc2aa9ea076de95f2c1da49eb84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_5.txt
@@ -0,0 +1,13 @@
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55a2d909c5289930aabba405834903629ea2a83a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_6.txt
@@ -0,0 +1,8 @@
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8200856904ff100644e56f8a6bb32ff0001f2cd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4fa548c7c494119200597f0b37203ed53b317f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97a32e55367d77fa7b690a732f68a50721433f52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_summarization.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+rouge = evaluate.load("rouge")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33310c68863836c952847cdb289d0458ec52f2a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec05e303ac8ad37901217dd2bba5450ee0249b53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20431ec1f406390997ba90803fd24a848e18db03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a76f6228fa052709d0fbab045546365dd34fb4b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_11.txt
@@ -0,0 +1,2 @@
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5045098d2162afe54659205e2cefc615b65a5d1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_12.txt
@@ -0,0 +1,15 @@
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65f8373e57d52098a602cdfea6eba905251c5cf3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_13.txt
@@ -0,0 +1,6 @@
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7815a930b13ab62927f2dd8f9afc5a4e24c8a2c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a94f359127ec7c82c2ee3ce87ace0ce182d56cce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_15.txt
@@ -0,0 +1,9 @@
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00f25df4bcde80c75ab13e7192f8af34a20733fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..339bd94abe0042cdaa50a677be8101fd6fee0b0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b26c0b873b3cc106da688486ae61bde33b26be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_18.txt
@@ -0,0 +1,16 @@
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eafb8d15620ba842bf727a064bdb9846c1e017a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_19.txt
@@ -0,0 +1,9 @@
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..699894083adfbfa17a99cb26e173b6a9f0857c4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2e591d62570eda603003a37b8a6740ab12fb798
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_20.txt
@@ -0,0 +1,14 @@
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ead64aad23e686bddf2be79e51fadc2d137c9b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_21.txt
@@ -0,0 +1,17 @@
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e61e3591e30217f428fee5495be670f8aa128b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_22.txt
@@ -0,0 +1,4 @@
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66e8b8168c7078e8eb23d09065cd367e539463ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_23.txt
@@ -0,0 +1,4 @@
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbf18b959cf19bbf591a590fea1b06c26fd81591
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_24.txt
@@ -0,0 +1,10 @@
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1e023d472bf3cca01a32f14e745b23f104bad60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_25.txt
@@ -0,0 +1,6 @@
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2147f4d214642be719d02dd926f93621af7d3c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_26.txt
@@ -0,0 +1,5 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4f5c6e21a9bf8baf6b42be91dc078275bce4cb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_27.txt
@@ -0,0 +1,21 @@
+def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e59d511e6b9171c5d3eed7168d52d338b9ba606
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_28.txt
@@ -0,0 +1,7 @@
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bb7113abc794095f8d10d0c6a2e6596a3e0295c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_29.txt
@@ -0,0 +1,10 @@
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc91eb675afec761d9aafeb320379aa2ded1376e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03433c1f14ac775f867b056713370746c4cef5db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_30.txt
@@ -0,0 +1,5 @@
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..504cd81692bb5a9aea762c3d8a22c9cd1113555c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_31.txt
@@ -0,0 +1,21 @@
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66c888d472121aa38c18fda9041e5cab21e2cb0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_32.txt
@@ -0,0 +1,11 @@
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d707c4269ba6d2e42f98c5c3eb15c065b7de532e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_33.txt
@@ -0,0 +1,7 @@
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85b97cf3ba89546b88034cf744519c977137e9f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_34.txt
@@ -0,0 +1,11 @@
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b008e40b5932ba03094bbf2feb58e580cd83e775
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_35.txt
@@ -0,0 +1,11 @@
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dd80734bfb89739d31e15bacfbf03c918e8ba69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_36.txt
@@ -0,0 +1,10 @@
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09c3467af76343bf342704984b275357a2896bb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_37.txt
@@ -0,0 +1,12 @@
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4b58f339fd0ef99239f6f486a333fbdc2a35263
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_38.txt
@@ -0,0 +1,18 @@
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_39.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_39.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a922b5ac272a3eaf96f2298252a3eacea0fce0bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_39.txt
@@ -0,0 +1,10 @@
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..631dfcbc2656c0d266d0a7eb2f30c3eee85dcc89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_4.txt
@@ -0,0 +1,13 @@
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_40.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_40.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f7b8be9ef86a46840f42482ca7a66baed4639ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_40.txt
@@ -0,0 +1,10 @@
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdbddc460ded1cccb4bfcb39194dbef5d4bc811d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a69d20383810786fe8413d1960bc78262d36c983
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85d0c219444ec449b14ad3ad6980d05ad8f63024
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cef715d251af54fb84b7ea3ea961209f91efe27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdcf80e02808040c517546b1883a167abdb92bd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_text-to-speech.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e98b2abe7a18cf7b90d5e514e145393add0678b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6855d7594a99379c4ab80a24047dfec5f08bc4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1258575323ca2caeb74ec886e3fb3436e7bad520
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_10.txt
@@ -0,0 +1,2 @@
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..267f892be45240a2327a8d79b53d993aa00ec69d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_11.txt
@@ -0,0 +1,16 @@
+labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b58a45df1233eb7d3e814a97b6636b7fbd627a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6fdc53b77277551f96751fb519f897f5edadb00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_13.txt
@@ -0,0 +1 @@
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..975063345e92a4b57faecc49510fe2e05ac1b73e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7650107d3e703ad3f813cae6dd430968841f8b89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_15.txt
@@ -0,0 +1,5 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e097f2229319f6e2ea507751215edd5d9cd6b4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_16.txt
@@ -0,0 +1,10 @@
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d267a6d76e98cb4feead084c7e0c771c0b10aa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_17.txt
@@ -0,0 +1,15 @@
+true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..725f72ba2e5b2907265a045ede7f5426c4bd505a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_18.txt
@@ -0,0 +1,3 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7dd9008714315454a331076b7913d38bbd33fdc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_19.txt
@@ -0,0 +1,30 @@
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63208ef11e675c03b823f83144f718f57c40e9a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2292d1cf85382760296be0efe7acd59ad13f9ea5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_20.txt
@@ -0,0 +1,3 @@
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56f687b06348e4dad9fe6f8f2789f59393fe1de1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_21.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b494f773cbcc7b0ba57948c73420f45eeda0b1d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_22.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0c7d1b4425d1d512f35c222d499f48b063ebb6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_23.txt
@@ -0,0 +1,22 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4610d911a41cb895d705eb36d128dac10f7e74e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_24.txt
@@ -0,0 +1,7 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66437fd679ffe66a544f8fed8c37da3bc628e853
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_25.txt
@@ -0,0 +1,12 @@
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5474637d86e3af8882c5d835804d585e8e8a57db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_26.txt
@@ -0,0 +1,6 @@
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76168d34aba58f189051c68d6c3bf7055bf52c8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_27.txt
@@ -0,0 +1,14 @@
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9da125c02e1e2e5dee2f1bd0ec1abf02f148cf7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_28.txt
@@ -0,0 +1,5 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..771d3c6702360c3ce737055d4f1251ed2ce348fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_29.txt
@@ -0,0 +1,12 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4faf4939d40ba1c94ae46200a68e095b8d22bab1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cbd6eb1fe0973594b8c5478d002ea932d49c926
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_30.txt
@@ -0,0 +1,9 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6519dd1c6df2527f9159e478719e6b97174f31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_31.txt
@@ -0,0 +1,9 @@
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79791113fc2c3ca8a615964dd7f008976a55500d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_32.txt
@@ -0,0 +1 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ebb765ccd34b53b0889c68f94cd1e40acd5cb459
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_33.txt
@@ -0,0 +1,33 @@
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86757bd300672f4b52655f4a4c73f4eaa6a0264f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_34.txt
@@ -0,0 +1,9 @@
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81194a58cc0b2c8f865cb9b022d1e27ca3d44bdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_35.txt
@@ -0,0 +1,6 @@
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ca792abce43010da5fed41c36080c316d0a04d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_36.txt
@@ -0,0 +1,22 @@
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2a764e6993c2543227adb5d353df936759c5abb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_37.txt
@@ -0,0 +1,11 @@
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9059b43c52dd7cca6ca74370360523041977c1b9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_38.txt
@@ -0,0 +1,22 @@
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0118fd49de23952ff3f3a78844993c19a13fa112
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f64d1b1a3d8fd4c30c35e8f8b48e5648df4f5710
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f743cd4356a0d0d23d1ebe163fd3b977252b839a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9b82ce7632da1ae4b618a20d2da2fd80f13b598
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_7.txt
@@ -0,0 +1,5 @@
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1feb874de4b2c4d3b3d9a999915253adb254e2d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_8.txt
@@ -0,0 +1 @@
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fee0c3e6da3d5223a0213fd22b14c4be6f81762c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_token_classification.txt_chunk_9.txt
@@ -0,0 +1,5 @@
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df92143ccb28c3e3ac126a33a0676d8bd695c76c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Translation
+[[open-in-colab]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1b947fab5d23891be99e0aadac1a28ba6dc310d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0930ecc3698dfdc036bfc85a75dea7d5e03006b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_10.txt
@@ -0,0 +1,8 @@
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13f6584ebb3b3edf875f99d1751cdd766ba07bac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_11.txt
@@ -0,0 +1,8 @@
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b631e71bb548805066c39c513ac4b65b1b05fbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..087f3ea883395cd4903a51c08bf4c59a029126c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_13.txt
@@ -0,0 +1,9 @@
+labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ed999f03c06b91a1d60c6def8cbc985e3e22608
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_14.txt
@@ -0,0 +1,11 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f40e4bacfffddb42e65efd0143e1ac62900034f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_15.txt
@@ -0,0 +1,3 @@
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..811131379804b22941e47ed92047d721898fda58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_16.txt
@@ -0,0 +1,23 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4610d911a41cb895d705eb36d128dac10f7e74e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_17.txt
@@ -0,0 +1,7 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..487ba7a572cfea6ad9a8502365981e9866c73a03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_18.txt
@@ -0,0 +1,9 @@
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64ad065a45717d80a79426c06200b80b8a786777
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_19.txt
@@ -0,0 +1,14 @@
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c79db2e4af28cc6d349b58fb245d0b5754deb4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c100736004ac46c6874a84508959e064433e67b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_20.txt
@@ -0,0 +1,5 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df78f2a692c15d94f6227dcb519b45d16e8bf479
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_21.txt
@@ -0,0 +1,12 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09bff607990ac364a5e4c7109df1ebc7af1b11c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_22.txt
@@ -0,0 +1,9 @@
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fec6c5c234cb72878d394aca5d7f74de30c571b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec025073307f0e78c4617419e0634191e1ca4a4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_24.txt
@@ -0,0 +1,5 @@
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66858a992f06ac1cf41da9f30ed45d82780d4b18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_25.txt
@@ -0,0 +1,5 @@
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75d75859d6aad3eb89b47227fe9ee2f1854d8a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_26.txt
@@ -0,0 +1,11 @@
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..945b56c1e3ba2647c4417280a58187a91f9198c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_27.txt
@@ -0,0 +1,7 @@
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214866a71924eb1630c0a918af1834982fa05b75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_28.txt
@@ -0,0 +1,12 @@
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e04cc7021112f1bd776123c7cda4a5d642162477
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60361d4b1586e1168ca6dcd1f44988de14e2a034
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e9db975ec7d228133fd8c20fe899cb6306b5a35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_30.txt
@@ -0,0 +1,4 @@
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..def2cb74ad2c280574a818b701790e0370e99c6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c10d16a163f215e911a5e8fdd2440e7baac87e96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7cfc510e4b558c2ddb1d070e9dccf2caaa46a26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_6.txt
@@ -0,0 +1,3 @@
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0597cd47e9269fac07cea9e8da8d1b6d7b73e31e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f95fb8d19c8ff07756796075933f2dc4bb43d745
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_8.txt
@@ -0,0 +1,3 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4fa548c7c494119200597f0b37203ed53b317f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_translation.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33973e01893ebbc634236e85a20c790aafbf5c27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc49b6ca69ad127343ed5947423a3a56c0ba1505
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..106bd76141ef1aefda5410ca390e66ba3a13b52d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29bac36cb693b4eb81258694ab7e2f7327f03585
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_11.txt
@@ -0,0 +1 @@
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e972d15cfab072b6cb79e9b7ddea86a8af6455f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_12.txt
@@ -0,0 +1,3 @@
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81bb783605451c4cf1d04321fc008a4e30bc7018
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..870da2b60b89bea1351f2a97849f42aea6e65ab4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_14.txt
@@ -0,0 +1,9 @@
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45587dd98b8bae11b61714f59734e3770b01b1e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_15.txt
@@ -0,0 +1,16 @@
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b126325abfd6a3f702bd4ac7935d9107ab60def9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36c5b97173bc2ca1420365bbed3ede70a9a2dfdc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_17.txt
@@ -0,0 +1,4 @@
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f342ed225a6733c411e62bb1102287dead1e5bda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f6ce528070691c79e5bf2e419af605a9cb3ffcd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_19.txt
@@ -0,0 +1 @@
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1d797edadfb88fe4be3df02bbc94b571316cf4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b260d8ff89443adc452e686bf0f811ff73812e50
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_20.txt
@@ -0,0 +1,23 @@
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..457815d99837ed27a841abf15b9bb905da10f716
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_21.txt
@@ -0,0 +1 @@
+The same sequence of workflow can be applied to the validation and evaluation sets:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9804fff23c085acb8caab77a9520db1849e224c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_22.txt
@@ -0,0 +1,27 @@
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e96f2ca4591d1d8a2e42a0c4e14ab42ff0615c20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_23.txt
@@ -0,0 +1,2 @@
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5a79e21b5739a915d63fc53d1131b6c4651fe12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_24.txt
@@ -0,0 +1,5 @@
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb8f62c25a13fee79fecead74f46c39eeb98fe36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_25.txt
@@ -0,0 +1,29 @@
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..041d5f3a21bced3278446434183e8bc0615c2e7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_26.txt
@@ -0,0 +1,3 @@
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d81715142f7d7576209e9a37f2c7f665fcf8dd0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_27.txt
@@ -0,0 +1,19 @@
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7062bf1017a02ba43a413d62da809f80287a53bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_28.txt
@@ -0,0 +1,2 @@
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fec2fedb8d44bfd3aee90bfa89181959c8ee24af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_29.txt
@@ -0,0 +1,5 @@
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0eb4cefbdbfe95dde4f97f8e2a10550f37e2e57b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_3.txt
@@ -0,0 +1,7 @@
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0de88a1f30729b06c8c2eaf35bafe173b8bce3d2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_30.txt
@@ -0,0 +1,3 @@
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_31.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_31.txt
new file mode 100644
index 0000000000000000000000000000000000000000..502164ba5f940e00d6c1d7c10d7d48300503d0a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_31.txt
@@ -0,0 +1,9 @@
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_32.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2db356d3097fee4c317fa453720341bafe3f5771
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_32.txt
@@ -0,0 +1,9 @@
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_33.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_33.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12c072508dc3ba3d836fb5571b9598d7316f96df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_33.txt
@@ -0,0 +1,9 @@
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_34.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_34.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66ae01180f8d4b0931d38274376953e5167c8f98
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_34.txt
@@ -0,0 +1,10 @@
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_35.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_35.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a82234ab819286e220ccd89cd414999819b2b942
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_35.txt
@@ -0,0 +1,8 @@
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_36.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_36.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10504e1e5cee8ab2feff7754371aefde93cab943
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_36.txt
@@ -0,0 +1,11 @@
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_37.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_37.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61a2d7aac614ead629c8e2f09a5b3ad91e351f16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_37.txt
@@ -0,0 +1,14 @@
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_38.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_38.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b535ccab0af02b932b0929a4b06ddd451b6e6d8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_38.txt
@@ -0,0 +1,8 @@
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2505ce3c1746596b6ef6a9710164c2cb878efe2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d76c19ef151aeb74c6ee5590ef025c95e7144316
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1ac96746248e7ceab375c15e83ffac5d0d46831
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b177a102c6da6aa200268f640ea5141f86869404
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5beb1b6bbc248f31a83d0de6f8bd7217facc4220
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c068e4711c82ca3b25ab0d4867bb9b0e89e8497
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_video_classification.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58279783996df99c60bec8a5d9806e2da64ab252
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72633aaf186db1bc780c99aadafd3678ff473967
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6785ba5b27701ce123ed8d38b37f205025b905e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7047e1279e34da3b99505f01adc40f4afe6bdb27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_11.txt
@@ -0,0 +1,10 @@
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddbaaa5af6ac74aabd4f869b0576afbaa9f132d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64c54d9026a8b44bda3e3dfd65bee0c215e03e19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_13.txt
@@ -0,0 +1,7 @@
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7783b09263e7a970de27a922ac73089cfbee5faa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_14.txt
@@ -0,0 +1,5 @@
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0612532b25f0d867a59590973fb39f5346a8e8d2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16eee8d3f16b0790a05f5cc3cdb4ce8a55df5155
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_16.txt
@@ -0,0 +1,2 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4be95c6686c68fb6a201451e87e86372eba41fb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a24c50ad6ae423018b61fe4d22c6f3dffccec48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_18.txt
@@ -0,0 +1,10 @@
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5008a41fb3b437e37080dc6f73a717dd87561d4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_19.txt
@@ -0,0 +1,15 @@
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b47b511fdc5d276755d56b698ca6fcdaa913385
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9010751e403424f0ba8efafae1800ec0fd179da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_20.txt
@@ -0,0 +1,16 @@
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0962e01700959dd545222f8f608b7de92625600
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_21.txt
@@ -0,0 +1,8 @@
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f129715f79241300a376f3c20a844dc69b14c8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_22.txt
@@ -0,0 +1,10 @@
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e1a62e762f4ea39fa1f0589d05cb786a9ef2af3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_23.txt
@@ -0,0 +1,5 @@
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c178d082e3bc4641fd7dd73707d372e85bb0ad81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_24.txt
@@ -0,0 +1,13 @@
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abe954365416fca1a6c18a0504b623f77bf49ca9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_25.txt
@@ -0,0 +1,7 @@
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78b05c03a8a7eb5e28c37ec20621542504937816
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_26.txt
@@ -0,0 +1,8 @@
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a0a71ba5502ac26288f72f6b87e45422c639298
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_27.txt
@@ -0,0 +1,9 @@
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dc5489af54abd16c19f219e76d8244e997e9ae0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_28.txt
@@ -0,0 +1,5 @@
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8008cf9761cdc56d96ec03a234f379d4a893f33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_29.txt
@@ -0,0 +1,3 @@
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e58641678adad54f371b6cc5fed4ae8903543ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_3.txt
@@ -0,0 +1,10 @@
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c575ea351837339f3d9b03cbafaa1b4bf8a1479f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b61edff1b8f7780e10264aadd7b6e49b10426270
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..557ca7b84034d2d9403ebf38fc8324a63f0e00ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c71365f7b21de0f96440751d11250d3c8c442c3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5511a185c3d23a73765aac6973f9499824894122
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fee39bd530d514748880d57c18c8e5a86ec77564
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_visual_question_answering.txt_chunk_9.txt
@@ -0,0 +1,7 @@
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90134ac23f93f59ee7310979fc87272f8d9b7686
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..735a39f17b51bf411ac1b771339db90c92af83d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16fa8afbf07b134e79b63d3a5eb732a6e49cd495
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fed3e153cf47bba98b1f4c24431d252d1279e212
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_3.txt
@@ -0,0 +1,3 @@
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe082f1617ca9e8accdb993d863d81314fe42e81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65d787148958e014ace67bd60ead4336d6be790e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4847c5da0a5a4c42d1c53b7c7dabd68127166fa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_6.txt
@@ -0,0 +1,11 @@
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7705ae723756305830af4802b3feae08582974b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_7.txt
@@ -0,0 +1,6 @@
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..badeab6e1281072a7224037f823c103a4be6ed39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_8.txt
@@ -0,0 +1,4 @@
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5394a36947e47e3258470623308f23a3f9bc0d65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_image_classification.txt_chunk_9.txt
@@ -0,0 +1,15 @@
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9fa44851a46b26ff69f4d098c6f72a04f7fb97d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8720ed7e54508d213a304cb8307ca60dceaa422b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..856d7982aec3bd62dc0d152a48e953048457fca1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71dfa1115db3cb9d2f133eef7085c58858b3f9fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_11.txt
@@ -0,0 +1,5 @@
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68dab6a72c29fc2176d791186419464db303dc2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca6fa531b7677a10b6427ec1184d769242c5fb4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19b453f31eb8a062c7fada4a39ac3feba6b4ead2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_14.txt
@@ -0,0 +1,6 @@
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8afb80905e1aa3a029782db4d112cd9ae7de9cfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f06fe428b419748d2659864f5ef7231c0d07948
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_16.txt
@@ -0,0 +1,6 @@
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5b68cd2baf105a37d15035da7d478fc7c749302
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_17.txt
@@ -0,0 +1,11 @@
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a353eca24de567bbeb739683cd147039a39a38fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..137d558bffdcf814b5826a42311d8430272c9117
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
@@ -0,0 +1,2 @@
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b22351ebf9ded1cf8d519b0837e40aba9e96f043
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
@@ -0,0 +1,23 @@
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e198225454098d912dffc6b4f93c052db2c2859
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcff75afefccaa963c3a438b4c2d119ce7d9a144
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_6.txt
@@ -0,0 +1,7 @@
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62a5692d6c9fe4f597efa526f5f1e4b03a79a11c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_7.txt
@@ -0,0 +1,10 @@
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..745f6f2734ffb5e2ae3fa592a1ddc91ad30c0be1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5885a5055b02288062acd7754567ef005f93b066
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/fixed_chunks/tasks_zero_shot_object_detection.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__config.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__config.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95f895a67fd3a3a6af48cced964a63da4368646f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__config.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+docstyle-ignore
+INSTALL_CONTENT = """
+Transformers installation
+! pip install transformers datasets evaluate accelerate
+To install from source instead of the last release, comment the command above and uncomment the following one.
+! pip install git+https://github.com/huggingface/transformers.git
+"""
+notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}]
+black_avoid_patterns = {
+    "{processor_class}": "FakeProcessorClass",
+    "{model_class}": "FakeModelClass",
+    "{object_class}": "FakeObjectClass",
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__redirects.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__redirects.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db31a9a52f2452f6a899eb981c042cc6ec8d396
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__redirects.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Optimizing inference
+perf_infer_gpu_many: perf_infer_gpu_one
+transformers_agents: agents
+quantization: quantization/overview
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e85d93f94512634fc5eefe23ea948e15f0d27207
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+sections:
+local: index
+    title: 🤗 Transformers
+local: quicktour
+    title: Quick tour
+local: installation
+    title: Installation
+  title: Get started
+sections:
+local: pipeline_tutorial
+    title: Run inference with pipelines
+local: autoclass_tutorial
+    title: Write portable code with AutoClass
+local: preprocessing
+    title: Preprocess data
+local: training
+    title: Fine-tune a pretrained model
+local: run_scripts
+    title: Train with a script
+local: accelerate
+    title: Set up distributed training with 🤗 Accelerate
+local: peft
+    title: Load and train adapters with 🤗 PEFT
+local: model_sharing
+    title: Share your model
+local: agents
+    title: Agents
+local: llm_tutorial
+    title: Generation with LLMs
+local: conversations
+    title: Chatting with Transformers
+  title: Tutorials
+sections:
+isExpanded: false
+    sections:
+local: tasks/sequence_classification
+  title: Text classification
+local: tasks/token_classification
+  title: Token classification
+local: tasks/question_answering
+  title: Question answering
+local: tasks/language_modeling
+  title: Causal language modeling
+local: tasks/masked_language_modeling
+  title: Masked language modeling
+local: tasks/translation
+  title: Translation
+local: tasks/summarization
+  title: Summarization
+local: tasks/multiple_choice
+  title: Multiple choice
+title: Natural Language Processing
+
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc76fca4548624f649fa237225785da89578d80f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_1.txt
@@ -0,0 +1,60 @@
+isExpanded: false
+    sections:
+local: tasks/audio_classification
+  title: Audio classification
+local: tasks/asr
+  title: Automatic speech recognition
+title: Audio
+
+isExpanded: false
+    sections:
+local: tasks/image_classification
+  title: Image classification
+local: tasks/semantic_segmentation
+  title: Image segmentation
+local: tasks/video_classification
+  title: Video classification
+local: tasks/object_detection
+  title: Object detection
+local: tasks/zero_shot_object_detection
+  title: Zero-shot object detection
+local: tasks/zero_shot_image_classification
+  title: Zero-shot image classification
+local: tasks/monocular_depth_estimation
+  title: Depth estimation
+local: tasks/image_to_image
+  title: Image-to-Image
+local: tasks/image_feature_extraction
+  title: Image Feature Extraction
+local: tasks/mask_generation
+  title: Mask Generation
+local: tasks/knowledge_distillation_for_image_classification
+  title: Knowledge Distillation for Computer Vision
+title: Computer Vision
+
+isExpanded: false
+    sections:
+local: tasks/image_captioning
+  title: Image captioning
+local: tasks/document_question_answering
+  title: Document Question Answering
+local: tasks/visual_question_answering
+  title: Visual Question Answering
+local: tasks/text-to-speech
+  title: Text to speech
+title: Multimodal
+
+isExpanded: false
+    sections:
+local: generation_strategies
+  title: Customize the generation strategy
+title: Generation
+
+isExpanded: false
+    sections:
+local: tasks/idefics
+  title: Image tasks with IDEFICS
+local: tasks/prompting
+  title: LLM prompting guide
+title: Prompting
+  title: Task Guides
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7907df809ac91b48c312e18da0dc9e019da3afab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_10.txt
@@ -0,0 +1,65 @@
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
+    title: PEGASUS-X
+local: model_doc/persimmon
+    title: Persimmon
+local: model_doc/phi
+    title: Phi
+local: model_doc/phi3
+    title: Phi-3
+local: model_doc/phobert
+    title: PhoBERT
+local: model_doc/plbart
+    title: PLBart
+local: model_doc/prophetnet
+    title: ProphetNet
+local: model_doc/qdqbert
+    title: QDQBert
+local: model_doc/qwen2
+    title: Qwen2
+local: model_doc/qwen2_moe
+    title: Qwen2MoE
+local: model_doc/rag
+    title: RAG
+local: model_doc/realm
+    title: REALM
+local: model_doc/recurrent_gemma
+    title: RecurrentGemma
+local: model_doc/reformer
+    title: Reformer
+local: model_doc/rembert
+    title: RemBERT
+local: model_doc/retribert
+    title: RetriBERT
+local: model_doc/roberta
+    title: RoBERTa
+local: model_doc/roberta-prelayernorm
+    title: RoBERTa-PreLayerNorm
+local: model_doc/roc_bert
+    title: RoCBert
+local: model_doc/roformer
+    title: RoFormer
+local: model_doc/rwkv
+    title: RWKV
+local: model_doc/splinter
+    title: Splinter
+local: model_doc/squeezebert
+    title: SqueezeBERT
+local: model_doc/stablelm
+    title: StableLm
+local: model_doc/starcoder2
+    title: Starcoder2
+local: model_doc/switch_transformers
+    title: SwitchTransformers
+local: model_doc/t5
+    title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fede0d7ec437f76215dbc9b5591f1621ea920044
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_11.txt
@@ -0,0 +1,66 @@
+title: T5
+local: model_doc/t5v1.1
+    title: T5v1.1
+local: model_doc/tapex
+    title: TAPEX
+local: model_doc/transfo-xl
+    title: Transformer XL
+local: model_doc/ul2
+    title: UL2
+local: model_doc/umt5
+    title: UMT5
+local: model_doc/xmod
+    title: X-MOD
+local: model_doc/xglm
+    title: XGLM
+local: model_doc/xlm
+    title: XLM
+local: model_doc/xlm-prophetnet
+    title: XLM-ProphetNet
+local: model_doc/xlm-roberta
+    title: XLM-RoBERTa
+local: model_doc/xlm-roberta-xl
+    title: XLM-RoBERTa-XL
+local: model_doc/xlm-v
+    title: XLM-V
+local: model_doc/xlnet
+    title: XLNet
+local: model_doc/yoso
+    title: YOSO
+  title: Text models
+isExpanded: false
+  sections:
+local: model_doc/beit
+    title: BEiT
+local: model_doc/bit
+    title: BiT
+local: model_doc/conditional_detr
+    title: Conditional DETR
+local: model_doc/convnext
+    title: ConvNeXT
+local: model_doc/convnextv2
+    title: ConvNeXTV2
+local: model_doc/cvt
+    title: CvT
+local: model_doc/deformable_detr
+    title: Deformable DETR
+local: model_doc/deit
+    title: DeiT
+local: model_doc/depth_anything
+    title: Depth Anything
+local: model_doc/deta
+    title: DETA
+local: model_doc/detr
+    title: DETR
+local: model_doc/dinat
+    title: DiNAT
+local: model_doc/dinov2
+    title: DINOV2
+local: model_doc/dit
+    title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75e23930618ca5f0f89340d10e48bbc2e67ee9ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_12.txt
@@ -0,0 +1,66 @@
+title: DiT
+local: model_doc/dpt
+    title: DPT
+local: model_doc/efficientformer
+    title: EfficientFormer
+local: model_doc/efficientnet
+    title: EfficientNet
+local: model_doc/focalnet
+    title: FocalNet
+local: model_doc/glpn
+    title: GLPN
+local: model_doc/imagegpt
+    title: ImageGPT
+local: model_doc/levit
+    title: LeViT
+local: model_doc/mask2former
+    title: Mask2Former
+local: model_doc/maskformer
+    title: MaskFormer
+local: model_doc/mobilenet_v1
+    title: MobileNetV1
+local: model_doc/mobilenet_v2
+    title: MobileNetV2
+local: model_doc/mobilevit
+    title: MobileViT
+local: model_doc/mobilevitv2
+    title: MobileViTV2
+local: model_doc/nat
+    title: NAT
+local: model_doc/poolformer
+    title: PoolFormer
+local: model_doc/pvt
+    title: Pyramid Vision Transformer (PVT)
+local: model_doc/pvt_v2
+    title: Pyramid Vision Transformer v2 (PVTv2)
+local: model_doc/regnet
+    title: RegNet
+local: model_doc/resnet
+    title: ResNet
+local: model_doc/rt_detr
+    title: RT-DETR
+local: model_doc/segformer
+    title: SegFormer
+local: model_doc/seggpt
+    title: SegGpt
+local: model_doc/superpoint
+    title: SuperPoint
+local: model_doc/swiftformer
+    title: SwiftFormer
+local: model_doc/swin
+    title: Swin Transformer
+local: model_doc/swinv2
+    title: Swin Transformer V2
+local: model_doc/swin2sr
+    title: Swin2SR
+local: model_doc/table-transformer
+    title: Table Transformer
+local: model_doc/upernet
+    title: UperNet
+local: model_doc/van
+    title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5985818c50b21bdf83ad40a3d81e39c23367d1c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_13.txt
@@ -0,0 +1,64 @@
+title: VAN
+local: model_doc/vit
+    title: Vision Transformer (ViT)
+local: model_doc/vit_hybrid
+    title: ViT Hybrid
+local: model_doc/vitdet
+    title: ViTDet
+local: model_doc/vit_mae
+    title: ViTMAE
+local: model_doc/vitmatte
+    title: ViTMatte
+local: model_doc/vit_msn
+    title: ViTMSN
+local: model_doc/yolos
+    title: YOLOS
+  title: Vision models
+isExpanded: false
+  sections:
+local: model_doc/audio-spectrogram-transformer
+    title: Audio Spectrogram Transformer
+local: model_doc/bark
+    title: Bark
+local: model_doc/clap
+    title: CLAP
+local: model_doc/encodec
+    title: EnCodec
+local: model_doc/hubert
+    title: Hubert
+local: model_doc/mctct
+    title: MCTCT
+local: model_doc/mms
+    title: MMS
+local: model_doc/musicgen
+    title: MusicGen
+local: model_doc/musicgen_melody
+    title: MusicGen Melody
+local: model_doc/pop2piano
+    title: Pop2Piano
+local: model_doc/seamless_m4t
+    title: Seamless-M4T
+local: model_doc/seamless_m4t_v2
+    title: SeamlessM4T-v2
+local: model_doc/sew
+    title: SEW
+local: model_doc/sew-d
+    title: SEW-D
+local: model_doc/speech_to_text
+    title: Speech2Text
+local: model_doc/speech_to_text_2
+    title: Speech2Text2
+local: model_doc/speecht5
+    title: SpeechT5
+local: model_doc/unispeech
+    title: UniSpeech
+local: model_doc/unispeech-sat
+    title: UniSpeech-SAT
+local: model_doc/univnet
+    title: UnivNet
+local: model_doc/vits
+    title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbfd118066ce136b1a2ce010fdbac94deb81090c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_14.txt
@@ -0,0 +1,65 @@
+title: VITS
+local: model_doc/wav2vec2
+    title: Wav2Vec2
+local: model_doc/wav2vec2-bert
+    title: Wav2Vec2-BERT
+local: model_doc/wav2vec2-conformer
+    title: Wav2Vec2-Conformer
+local: model_doc/wav2vec2_phoneme
+    title: Wav2Vec2Phoneme
+local: model_doc/wavlm
+    title: WavLM
+local: model_doc/whisper
+    title: Whisper
+local: model_doc/xls_r
+    title: XLS-R
+local: model_doc/xlsr_wav2vec2
+    title: XLSR-Wav2Vec2
+  title: Audio models
+isExpanded: false
+  sections:
+local: model_doc/timesformer
+    title: TimeSformer
+local: model_doc/videomae
+    title: VideoMAE
+local: model_doc/vivit
+    title: ViViT
+  title: Video models
+isExpanded: false
+  sections:
+local: model_doc/align
+    title: ALIGN
+local: model_doc/altclip
+    title: AltCLIP
+local: model_doc/blip
+    title: BLIP
+local: model_doc/blip-2
+    title: BLIP-2
+local: model_doc/bridgetower
+    title: BridgeTower
+local: model_doc/bros
+    title: BROS
+local: model_doc/chinese_clip
+    title: Chinese-CLIP
+local: model_doc/clip
+    title: CLIP
+local: model_doc/clipseg
+    title: CLIPSeg
+local: model_doc/clvp
+    title: CLVP
+local: model_doc/data2vec
+    title: Data2Vec
+local: model_doc/deplot
+    title: DePlot
+local: model_doc/donut
+    title: Donut
+local: model_doc/flava
+    title: FLAVA
+local: model_doc/git
+    title: GIT
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b820091558224a72924b0fcd23af4f8ed81be9eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_15.txt
@@ -0,0 +1,65 @@
+local: model_doc/grounding-dino
+    title: Grounding DINO
+local: model_doc/groupvit
+    title: GroupViT
+local: model_doc/idefics
+    title: IDEFICS
+local: model_doc/idefics2
+    title: Idefics2
+local: model_doc/instructblip
+    title: InstructBLIP
+local: model_doc/kosmos-2
+    title: KOSMOS-2
+local: model_doc/layoutlm
+    title: LayoutLM
+local: model_doc/layoutlmv2
+    title: LayoutLMV2
+local: model_doc/layoutlmv3
+    title: LayoutLMV3
+local: model_doc/layoutxlm
+    title: LayoutXLM
+local: model_doc/lilt
+    title: LiLT
+local: model_doc/llava
+    title: Llava
+local: model_doc/llava_next
+    title: LLaVA-NeXT
+local: model_doc/lxmert
+    title: LXMERT
+local: model_doc/matcha
+    title: MatCha
+local: model_doc/mgp-str
+    title: MGP-STR
+local: model_doc/nougat
+    title: Nougat
+local: model_doc/oneformer
+    title: OneFormer
+local: model_doc/owlvit
+    title: OWL-ViT
+local: model_doc/owlv2
+    title: OWLv2
+local: model_doc/paligemma
+    title: PaliGemma
+local: model_doc/perceiver
+    title: Perceiver
+local: model_doc/pix2struct
+    title: Pix2Struct
+local: model_doc/sam
+    title: Segment Anything
+local: model_doc/siglip
+    title: SigLIP
+local: model_doc/speech-encoder-decoder
+    title: Speech Encoder Decoder Models
+local: model_doc/tapas
+    title: TAPAS
+local: model_doc/trocr
+    title: TrOCR
+local: model_doc/tvlt
+    title: TVLT
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f15382182bdae738d6b28fd71732fae1c062a5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_16.txt
@@ -0,0 +1,45 @@
+local: model_doc/tvp
+    title: TVP
+local: model_doc/udop
+    title: UDOP
+local: model_doc/video_llava
+    title: VideoLlava
+local: model_doc/vilt
+    title: ViLT
+local: model_doc/vipllava
+    title: VipLlava
+local: model_doc/vision-encoder-decoder
+    title: Vision Encoder Decoder Models
+local: model_doc/vision-text-dual-encoder
+    title: Vision Text Dual Encoder
+local: model_doc/visual_bert
+    title: VisualBERT
+local: model_doc/xclip
+    title: X-CLIP
+  title: Multimodal models
+isExpanded: false
+  sections:
+local: model_doc/decision_transformer
+    title: Decision Transformer
+local: model_doc/trajectory_transformer
+    title: Trajectory Transformer
+  title: Reinforcement learning models
+isExpanded: false
+  sections:
+local: model_doc/autoformer
+    title: Autoformer
+local: model_doc/informer
+    title: Informer
+local: model_doc/patchtsmixer
+    title: PatchTSMixer
+local: model_doc/patchtst
+    title: PatchTST
+local: model_doc/time_series_transformer
+    title: Time Series Transformer
+  title: Time series models
+isExpanded: false
+  sections:
+local: model_doc/graphormer
+    title: Graphormer
+  title: Graph models
+title: Models
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35252b2045d5bfe855a960ff58367079d9597519
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_17.txt
@@ -0,0 +1,21 @@
+sections:
+local: internal/modeling_utils
+  title: Custom Layers and Utilities
+local: internal/pipelines_utils
+  title: Utilities for pipelines
+local: internal/tokenization_utils
+  title: Utilities for Tokenizers
+local: internal/trainer_utils
+  title: Utilities for Trainer
+local: internal/generation_utils
+  title: Utilities for Generation
+local: internal/image_processing_utils
+  title: Utilities for Image Processors
+local: internal/audio_utils
+  title: Utilities for Audio processing
+local: internal/file_utils
+  title: General Utilities
+local: internal/time_series_utils
+  title: Utilities for Time Series
+title: Internal Helpers
+  title: API
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8e0351ff5f53ec3a87ad1dc74b2d8516227f2a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_2.txt
@@ -0,0 +1,74 @@
+sections:
+local: fast_tokenizers
+    title: Use fast tokenizers from 🤗 Tokenizers
+local: multilingual
+    title: Run inference with multilingual models
+local: create_a_model
+    title: Use model-specific APIs
+local: custom_models
+    title: Share a custom model
+local: chat_templating
+    title: Templates for chat models
+local: trainer
+    title: Trainer
+local: sagemaker
+    title: Run training on Amazon SageMaker
+local: serialization
+    title: Export to ONNX
+local: tflite
+    title: Export to TFLite
+local: torchscript
+    title: Export to TorchScript
+local: benchmarks
+    title: Benchmarks
+local: notebooks
+    title: Notebooks with examples
+local: community
+    title: Community resources
+local: troubleshooting
+    title: Troubleshoot
+local: gguf
+    title: Interoperability with GGUF files
+  title: Developer guides
+sections:
+local: quantization/overview
+    title: Getting started
+local: quantization/bitsandbytes
+    title: bitsandbytes
+local: quantization/gptq
+    title: GPTQ
+local: quantization/awq
+    title: AWQ
+local: quantization/aqlm
+    title: AQLM
+local: quantization/quanto
+    title: Quanto
+local: quantization/eetq
+    title: EETQ
+local: quantization/hqq
+    title: HQQ
+local: quantization/optimum
+    title: Optimum
+local: quantization/contribute
+    title: Contribute new quantization method
+  title: Quantization Methods
+sections:
+local: performance
+    title: Overview
+local: llm_optims
+    title: LLM inference optimization
+sections:
+local: perf_train_gpu_one
+  title: Methods and tools for efficient training on a single GPU
+local: perf_train_gpu_many
+  title: Multiple GPUs and parallelism
+local: fsdp
+  title: Fully Sharded Data Parallel
+local: deepspeed
+  title: DeepSpeed
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19a884943e550ead79c0984010d9a3963b9d11e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+local: perf_train_cpu
+  title: Efficient training on CPU
+local: perf_train_cpu_many
+  title: Distributed CPU training
+local: perf_train_tpu_tf
+  title: Training on TPU with TensorFlow
+local: perf_train_special
+  title: PyTorch training on Apple silicon
+local: perf_hardware
+  title: Custom hardware for training
+local: hpo_train
+  title: Hyperparameter Search using Trainer API
+title: Efficient training techniques
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1bf36552ea906765ee60621f660b8cba2f3ac1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+sections:
+local: perf_infer_cpu
+  title: CPU inference
+local: perf_infer_gpu_one
+  title: GPU inference
+title: Optimizing inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56de144e5887e38ca47206471484a6e5f61e2604
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_5.txt
@@ -0,0 +1,73 @@
+local: big_models
+    title: Instantiate a big model
+local: debugging
+    title: Debugging
+local: tf_xla
+    title: XLA Integration for TensorFlow Models
+local: perf_torch_compile
+    title: Optimize inference using torch.compile()
+  title: Performance and scalability
+sections:
+local: contributing
+    title: How to contribute to 🤗 Transformers?
+local: add_new_model
+    title: How to add a model to 🤗 Transformers?
+local: add_new_pipeline
+    title: How to add a pipeline to 🤗 Transformers?
+local: testing
+    title: Testing
+local: pr_checks
+    title: Checks on a Pull Request
+  title: Contribute
+sections:
+local: philosophy
+    title: Philosophy
+local: glossary
+    title: Glossary
+local: task_summary
+    title: What 🤗 Transformers can do
+local: tasks_explained
+    title: How 🤗 Transformers solve tasks
+local: model_summary
+    title: The Transformer model family
+local: tokenizer_summary
+    title: Summary of the tokenizers
+local: attention
+    title: Attention mechanisms
+local: pad_truncation
+    title: Padding and truncation
+local: bertology
+    title: BERTology
+local: perplexity
+    title: Perplexity of fixed-length models
+local: pipeline_webserver
+    title: Pipelines for webserver inference
+local: model_memory_anatomy
+    title: Model training anatomy
+local: llm_tutorial_optimization
+    title: Getting the most out of LLMs
+  title: Conceptual guides
+sections:
+sections:
+local: main_classes/agent
+  title: Agents and Tools
+local: model_doc/auto
+  title: Auto Classes
+local: main_classes/backbones
+  title: Backbones
+local: main_classes/callback
+  title: Callbacks
+local: main_classes/configuration
+  title: Configuration
+local: main_classes/data_collator
+  title: Data Collator
+local: main_classes/keras_callbacks
+  title: Keras callbacks
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5553b9eee2a60b36b5c281f8947b0245b362fff8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_6.txt
@@ -0,0 +1,29 @@
+local: main_classes/logging
+  title: Logging
+local: main_classes/model
+  title: Models
+local: main_classes/text_generation
+  title: Text Generation
+local: main_classes/onnx
+  title: ONNX
+local: main_classes/optimizer_schedules
+  title: Optimization
+local: main_classes/output
+  title: Model outputs
+local: main_classes/pipelines
+  title: Pipelines
+local: main_classes/processors
+  title: Processors
+local: main_classes/quantization
+  title: Quantization
+local: main_classes/tokenizer
+  title: Tokenizer
+local: main_classes/trainer
+  title: Trainer
+local: main_classes/deepspeed
+  title: DeepSpeed
+local: main_classes/feature_extractor
+  title: Feature Extractor
+local: main_classes/image_processor
+  title: Image Processor
+title: Main Classes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1cdf17b29d19dda569170f7d75c98cc9822bfec7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_7.txt
@@ -0,0 +1,70 @@
+sections:
+isExpanded: false
+  sections:
+local: model_doc/albert
+    title: ALBERT
+local: model_doc/bart
+    title: BART
+local: model_doc/barthez
+    title: BARThez
+local: model_doc/bartpho
+    title: BARTpho
+local: model_doc/bert
+    title: BERT
+local: model_doc/bert-generation
+    title: BertGeneration
+local: model_doc/bert-japanese
+    title: BertJapanese
+local: model_doc/bertweet
+    title: Bertweet
+local: model_doc/big_bird
+    title: BigBird
+local: model_doc/bigbird_pegasus
+    title: BigBirdPegasus
+local: model_doc/biogpt
+    title: BioGpt
+local: model_doc/blenderbot
+    title: Blenderbot
+local: model_doc/blenderbot-small
+    title: Blenderbot Small
+local: model_doc/bloom
+    title: BLOOM
+local: model_doc/bort
+    title: BORT
+local: model_doc/byt5
+    title: ByT5
+local: model_doc/camembert
+    title: CamemBERT
+local: model_doc/canine
+    title: CANINE
+local: model_doc/codegen
+    title: CodeGen
+local: model_doc/code_llama
+    title: CodeLlama
+local: model_doc/cohere
+    title: Cohere
+local: model_doc/convbert
+    title: ConvBERT
+local: model_doc/cpm
+    title: CPM
+local: model_doc/cpmant
+    title: CPMANT
+local: model_doc/ctrl
+    title: CTRL
+local: model_doc/dbrx
+    title: DBRX
+local: model_doc/deberta
+    title: DeBERTa
+local: model_doc/deberta-v2
+    title: DeBERTa-v2
+local: model_doc/dialogpt
+    title: DialoGPT
+local: model_doc/distilbert
+    title: DistilBERT
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0bbb1e0cbc2d2ecc57eaa99b54c8748b408b8e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_8.txt
@@ -0,0 +1,67 @@
+local: model_doc/dpr
+    title: DPR
+local: model_doc/electra
+    title: ELECTRA
+local: model_doc/encoder-decoder
+    title: Encoder Decoder Models
+local: model_doc/ernie
+    title: ERNIE
+local: model_doc/ernie_m
+    title: ErnieM
+local: model_doc/esm
+    title: ESM
+local: model_doc/falcon
+    title: Falcon
+local: model_doc/fastspeech2_conformer
+    title: FastSpeech2Conformer
+local: model_doc/flan-t5
+    title: FLAN-T5
+local: model_doc/flan-ul2
+    title: FLAN-UL2
+local: model_doc/flaubert
+    title: FlauBERT
+local: model_doc/fnet
+    title: FNet
+local: model_doc/fsmt
+    title: FSMT
+local: model_doc/funnel
+    title: Funnel Transformer
+local: model_doc/fuyu
+    title: Fuyu
+local: model_doc/gemma
+    title: Gemma
+local: model_doc/openai-gpt
+    title: GPT
+local: model_doc/gpt_neo
+    title: GPT Neo
+local: model_doc/gpt_neox
+    title: GPT NeoX
+local: model_doc/gpt_neox_japanese
+    title: GPT NeoX Japanese
+local: model_doc/gptj
+    title: GPT-J
+local: model_doc/gpt2
+    title: GPT2
+local: model_doc/gpt_bigcode
+    title: GPTBigCode
+local: model_doc/gptsan-japanese
+    title: GPTSAN Japanese
+local: model_doc/gpt-sw3
+    title: GPTSw3
+local: model_doc/herbert
+    title: HerBERT
+local: model_doc/ibert
+    title: I-BERT
+local: model_doc/jamba
+    title: Jamba
+local: model_doc/jetmoe
+    title: JetMoe
+local: model_doc/jukebox
+    title: Jukebox
+local: model_doc/led
+    title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af3f9a91a35c6a3489ba6864cfe4f3902e323c88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/__toctree.txt_chunk_9.txt
@@ -0,0 +1,66 @@
+title: LED
+local: model_doc/llama
+    title: LLaMA
+local: model_doc/llama2
+    title: Llama2
+local: model_doc/llama3
+    title: Llama3
+local: model_doc/longformer
+    title: Longformer
+local: model_doc/longt5
+    title: LongT5
+local: model_doc/luke
+    title: LUKE
+local: model_doc/m2m_100
+    title: M2M100
+local: model_doc/madlad-400
+    title: MADLAD-400
+local: model_doc/mamba
+    title: Mamba
+local: model_doc/marian
+    title: MarianMT
+local: model_doc/markuplm
+    title: MarkupLM
+local: model_doc/mbart
+    title: MBart and MBart-50
+local: model_doc/mega
+    title: MEGA
+local: model_doc/megatron-bert
+    title: MegatronBERT
+local: model_doc/megatron_gpt2
+    title: MegatronGPT2
+local: model_doc/mistral
+    title: Mistral
+local: model_doc/mixtral
+    title: Mixtral
+local: model_doc/mluke
+    title: mLUKE
+local: model_doc/mobilebert
+    title: MobileBERT
+local: model_doc/mpnet
+    title: MPNet
+local: model_doc/mpt
+    title: MPT
+local: model_doc/mra
+    title: MRA
+local: model_doc/mt5
+    title: MT5
+local: model_doc/mvp
+    title: MVP
+local: model_doc/nezha
+    title: NEZHA
+local: model_doc/nllb
+    title: NLLB
+local: model_doc/nllb-moe
+    title: NLLB-MoE
+local: model_doc/nystromformer
+    title: Nyströmformer
+local: model_doc/olmo
+    title: OLMo
+local: model_doc/open-llama
+    title: Open-Llama
+local: model_doc/opt
+    title: OPT
+local: model_doc/pegasus
+    title: Pegasus
+local: model_doc/pegasus_x
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc7cbdc9a280d726b26d406c79183f9f5715ee81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+Distributed training with 🤗 Accelerate
+As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude. At Hugging Face, we created the 🤗 Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup, whether it is multiple GPU's on one machine or multiple GPU's across several machines. In this tutorial, learn how to customize your native PyTorch training loop to enable training in a distributed environment.
+Setup
+Get started by installing 🤗 Accelerate:
+
+pip install accelerate
+Then import and create an [~accelerate.Accelerator] object. The [~accelerate.Accelerator] will automatically detect your type of distributed setup and initialize all the necessary components for training. You don't need to explicitly place your model on a device.
+
+from accelerate import Accelerator
+accelerator = Accelerator()
+
+Prepare to accelerate
+The next step is to pass all the relevant training objects to the [~accelerate.Accelerator.prepare] method. This includes your training and evaluation DataLoaders, a model and an optimizer:
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+     train_dataloader, eval_dataloader, model, optimizer
+ )
+
+Backward
+The last addition is to replace the typical loss.backward() in your training loop with 🤗 Accelerate's [~accelerate.Accelerator.backward]method:
+
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         outputs = model(**batch)
+         loss = outputs.loss
+         accelerator.backward(loss)
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+As you can see in the following code, you only need to add four additional lines of code to your training loop to enable distributed training!
+
++ from accelerate import Accelerator
+  from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler
+
+accelerator = Accelerator()
+
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
+  optimizer = AdamW(model.parameters(), lr=3e-5)
+
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6facf82c61711c66cb3758eab2da033d8c0e9dfa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_accelerate.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+model.to(device)
+
+train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(
+
+train_dataloader, eval_dataloader, model, optimizer
+)
+
+num_epochs = 3
+  num_training_steps = num_epochs * len(train_dataloader)
+  lr_scheduler = get_scheduler(
+      "linear",
+      optimizer=optimizer,
+      num_warmup_steps=0,
+      num_training_steps=num_training_steps
+  )
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+  for epoch in range(num_epochs):
+      for batch in train_dataloader:
+
+          outputs = model(**batch)
+          loss = outputs.loss
+
++         accelerator.backward(loss)
+      optimizer.step()
+      lr_scheduler.step()
+      optimizer.zero_grad()
+      progress_bar.update(1)
+
+Train
+Once you've added the relevant lines of code, launch your training in a script or a notebook like Colaboratory.
+Train with a script
+If you are running your training from a script, run the following command to create and save a configuration file:
+
+accelerate config
+Then launch your training with:
+
+accelerate launch train.py
+Train with a notebook
+🤗 Accelerate can also run in a notebook if you're planning on using Colaboratory's TPUs. Wrap all the code responsible for training in a function, and pass it to [~accelerate.notebook_launcher]:
+
+from accelerate import notebook_launcher
+notebook_launcher(training_function)
+
+For more information about 🤗 Accelerate and its rich features, refer to the documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79073e8f0899543713869864ccfa04bbdabdf313
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+How to add a model to 🤗 Transformers?
+The 🤗 Transformers library is often able to offer new models thanks to community contributors. But this can be a challenging project and requires an in-depth knowledge of the 🤗 Transformers library and the model to implement. At Hugging Face, we're trying to empower more of the community to actively add models and we've put together this guide to walk you through the process of adding a PyTorch model (make sure you have PyTorch installed).
+Along the way, you'll:
+
+get insights into open-source best practices
+understand the design principles behind one of the most popular deep learning libraries
+learn how to efficiently test large models
+learn how to integrate Python utilities like black, ruff, and make fix-copies to ensure clean and readable code
+
+A Hugging Face team member will be available to help you along the way so you'll never be alone. 🤗 ❤️
+To get started, open a New model addition issue for the model you want to see in 🤗 Transformers. If you're not especially picky about contributing a specific model, you can filter by the New model label to see if there are any unclaimed model requests and work on it.
+Once you've opened a new model request, the first step is to get familiar with 🤗 Transformers if you aren't already!
+General overview of 🤗 Transformers
+First, you should get a general overview of 🤗 Transformers. 🤗 Transformers is a very opinionated library, so there is a
+chance that you don't agree with some of the library's philosophies or design choices. From our experience, however, we
+found that the fundamental design choices and philosophies of the library are crucial to efficiently scale 🤗
+Transformers while keeping maintenance costs at a reasonable level.
+A good first starting point to better understand the library is to read the documentation of our philosophy. As a result of our way of working, there are some choices that we try to apply to all models:
+
+Composition is generally favored over-abstraction
+Duplicating code is not always bad if it strongly improves the readability or accessibility of a model
+Model files are as self-contained as possible so that when you read the code of a specific model, you ideally only
+  have to look into the respective modeling_.py file.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f722b1a916b0963330511ac7ed47bde54f497e1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+In our opinion, the library's code is not just a means to provide a product, e.g. the ability to use BERT for
+inference, but also as the very product that we want to improve. Hence, when adding a model, the user is not only the
+person who will use your model, but also everybody who will read, try to understand, and possibly tweak your code.
+With this in mind, let's go a bit deeper into the general library design.
+Overview of models
+To successfully add a model, it is important to understand the interaction between your model and its config,
+[PreTrainedModel], and [PretrainedConfig]. For exemplary purposes, we will
+call the model to be added to 🤗 Transformers BrandNewBert.
+Let's take a look:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba0ba0f662c787bb8fefc8103179b1551b522c67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_10.txt
@@ -0,0 +1,28 @@
+at a later stage when comparing the original model to the Hugging Face implementation, you can verify automatically
+  for each component individually that the corresponding component of the 🤗 Transformers implementation matches instead
+  of relying on visual comparison via print statements
+it can give you some rope to decompose the big problem of porting a model into smaller problems of just porting
+  individual components and thus structure your work better
+separating the model into logical meaningful components will help you to get a better overview of the model's design
+  and thus to better understand the model
+at a later stage those component-by-component tests help you to ensure that no regression occurs as you continue
+  changing your code
+
+Lysandre's integration checks for ELECTRA
+gives a nice example of how this can be done.
+However, if the original code-base is very complex or only allows intermediate components to be run in a compiled mode,
+it might be too time-consuming or even impossible to separate the model into smaller testable sub-components. A good
+example is T5's MeshTensorFlow library which is
+very complex and does not offer a simple way to decompose the model into its sub-components. For such libraries, one
+often relies on verifying print statements.
+No matter which strategy you choose, the recommended procedure is often the same that you should start to debug the
+starting layers first and the ending layers last.
+It is recommended that you retrieve the output, either by print statements or sub-component functions, of the following
+layers in the following order:
+
+Retrieve the input IDs passed to the model
+Retrieve the word embeddings
+Retrieve the input of the first Transformer layer
+Retrieve the output of the first Transformer layer
+Retrieve the output of the following n - 1 Transformer layers
+Retrieve the output of the whole BrandNewBert Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b77bd4cdc24e12f013eba4b5c9b3f3be639013e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_11.txt
@@ -0,0 +1,18 @@
+Input IDs should thereby consists of an array of integers, e.g. input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+The outputs of the following layers often consist of multi-dimensional float arrays and can look like this:
+[[
+ [-0.1465, -0.6501,  0.1993,  ,  0.1451,  0.3430,  0.6024],
+ [-0.4417, -0.5920,  0.3450,  , -0.3062,  0.6182,  0.7132],
+ [-0.5009, -0.7122,  0.4548,  , -0.3662,  0.6091,  0.7648],
+ ,
+ [-0.5613, -0.6332,  0.4324,  , -0.3792,  0.7372,  0.9288],
+ [-0.5416, -0.6345,  0.4180,  , -0.3564,  0.6992,  0.9191],
+ [-0.5334, -0.6403,  0.4271,  , -0.3339,  0.6533,  0.8694]]],
+We expect that every model added to 🤗 Transformers passes a couple of integration tests, meaning that the original
+model and the reimplemented version in 🤗 Transformers have to give the exact same output up to a precision of 0.001!
+Since it is normal that the exact same model written in different libraries can give a slightly different output
+depending on the library framework, we accept an error tolerance of 1e-3 (0.001). It is not enough if the model gives
+nearly the same output, they have to be almost identical. Therefore, you will certainly compare the intermediate
+outputs of the 🤗 Transformers version multiple times against the intermediate outputs of the original implementation of
+brand_new_bert in which case an efficient debugging environment of the original repository is absolutely
+important. Here is some advice to make your debugging environment as efficient as possible.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87115ff8fda47a77407d2f8e60e519ed9958896e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_12.txt
@@ -0,0 +1,23 @@
+Find the best way of debugging intermediate results. Is the original repository written in PyTorch? Then you should
+  probably take the time to write a longer script that decomposes the original model into smaller sub-components to
+  retrieve intermediate values. Is the original repository written in Tensorflow 1? Then you might have to rely on
+  TensorFlow print operations like tf.print to output
+  intermediate values. Is the original repository written in Jax? Then make sure that the model is not jitted when
+  running the forward pass, e.g. check-out this link.
+Use the smallest pretrained checkpoint you can find. The smaller the checkpoint, the faster your debug cycle
+  becomes. It is not efficient if your pretrained model is so big that your forward pass takes more than 10 seconds.
+  In case only very large checkpoints are available, it might make more sense to create a dummy model in the new
+  environment with randomly initialized weights and save those weights for comparison with the 🤗 Transformers version
+  of your model
+Make sure you are using the easiest way of calling a forward pass in the original repository. Ideally, you want to
+  find the function in the original repository that only calls a single forward pass, i.e. that is often called
+  predict, evaluate, forward or __call__. You don't want to debug a function that calls forward
+  multiple times, e.g. to generate text, like autoregressive_sample, generate.
+Try to separate the tokenization from the model's forward pass. If the original repository shows examples where
+  you have to input a string, then try to find out where in the forward call the string input is changed to input ids
+  and start from this point. This might mean that you have to possibly write a small script yourself or change the
+  original code so that you can directly input the ids instead of an input string.
+Make sure that the model in your debugging setup is not in training mode, which often causes the model to yield
+  random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
+  environment is deterministic so that the dropout layers are not used. Or use transformers.utils.set_seed
+  if the old and new implementations are in the same framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..661b0604add668676b4ca34d0d05d875491d8de3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_13.txt
@@ -0,0 +1,46 @@
+The following section gives you more specific details/tips on how you can do this for brand_new_bert.
+5.-14. Port BrandNewBert to 🤗 Transformers
+Next, you can finally start adding new code to 🤗 Transformers. Go into the clone of your 🤗 Transformers' fork:
+
+cd transformers
+In the special case that you are adding a model whose architecture exactly matches the model architecture of an
+existing model you only have to add a conversion script as described in this section.
+In this case, you can just re-use the whole model architecture of the already existing model.
+Otherwise, let's start generating a new model. We recommend using the following script to add a model starting from
+an existing model:
+
+transformers-cli add-new-model-like
+You will be prompted with a questionnaire to fill in the basic information of your model.
+Open a Pull Request on the main huggingface/transformers repo
+Before starting to adapt the automatically generated code, now is the time to open a “Work in progress (WIP)” pull
+request, e.g. “[WIP] Add brand_new_bert”, in 🤗 Transformers so that you and the Hugging Face team can work
+side-by-side on integrating the model into 🤗 Transformers.
+You should do the following:
+
+Create a branch with a descriptive name from your main branch
+
+   git checkout -b add_brand_new_bert
+
+Commit the automatically generated code:
+
+   git add .
+   git commit
+
+Fetch and rebase to current main
+
+   git fetch upstream
+   git rebase upstream/main
+
+Push the changes to your account using:
+
+   git push -u origin a-descriptive-name-for-my-changes
+
+Once you are satisfied, go to the webpage of your fork on GitHub. Click on “Pull request”. Make sure to add the
+   GitHub handle of some members of the Hugging Face team as reviewers, so that the Hugging Face team gets notified for
+   future changes.
+
+Change the PR into a draft by clicking on “Convert to draft” on the right of the GitHub pull request web page.
+
+In the following, whenever you have made some progress, don't forget to commit your work and push it to your account so
+that it shows in the pull request. Additionally, you should make sure to update your work with the current main from
+time to time by doing:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..980959c723be28e721029ab596fd045a0f092374
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_14.txt
@@ -0,0 +1,24 @@
+git fetch upstream
+git merge upstream/main
+In general, all questions you might have regarding the model or your implementation should be asked in your PR and
+discussed/solved in the PR. This way, the Hugging Face team will always be notified when you are committing new code or
+if you have a question. It is often very helpful to point the Hugging Face team to your added code so that the Hugging
+Face team can efficiently understand your problem or question.
+To do so, you can go to the “Files changed” tab where you see all of your changes, go to a line regarding which you
+want to ask a question, and click on the “+” symbol to add a comment. Whenever a question or problem has been solved,
+you can click on the “Resolve” button of the created comment.
+In the same way, the Hugging Face team will open comments when reviewing your code. We recommend asking most questions
+on GitHub on your PR. For some very general questions that are not very useful for the public, feel free to ping the
+Hugging Face team by Slack or email.
+5. Adapt the generated models code for brand_new_bert
+At first, we will focus only on the model itself and not care about the tokenizer. All the relevant code should be
+found in the generated files src/transformers/models/brand_new_bert/modeling_brand_new_bert.py and
+src/transformers/models/brand_new_bert/configuration_brand_new_bert.py.
+Now you can finally start coding :). The generated code in
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py will either have the same architecture as BERT if
+it's an encoder-only model or BART if it's an encoder-decoder model. At this point, you should remind yourself what
+you've learned in the beginning about the theoretical aspects of the model: How is the model different from BERT or
+BART?". Implement those changes which often means changing the self-attention layer, the order of the normalization
+layer, etc… Again, it is often useful to look at the similar architecture of already existing models in Transformers to
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..380743702ced9280668643d7c7d8a9f2c77c27fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_15.txt
@@ -0,0 +1,11 @@
+get a better feeling of how your model should be implemented.
+Note that at this point, you don't have to be very sure that your code is fully correct or clean. Rather, it is
+advised to add a first unclean, copy-pasted version of the original code to
+src/transformers/models/brand_new_bert/modeling_brand_new_bert.py until you feel like all the necessary code is
+added. From our experience, it is much more efficient to quickly add a first version of the required code and
+improve/correct the code iteratively with the conversion script as described in the next section. The only thing that
+has to work at this point is that you can instantiate the 🤗 Transformers implementation of brand_new_bert, i.e. the
+following command should work:
+thon
+from transformers import BrandNewBertModel, BrandNewBertConfig
+model = BrandNewBertModel(BrandNewBertConfig())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f2c38d91a59f889e678cda25d971f4270ce886b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_16.txt
@@ -0,0 +1,37 @@
+The above command will create a model according to the default parameters as defined in BrandNewBertConfig() with
+random weights, thus making sure that the init() methods of all components works.
+Note that all random initialization should happen in the _init_weights method of your BrandnewBertPreTrainedModel
+class. It should initialize all leaf modules depending on the variables of the config. Here is an example with the
+BERT _init_weights method:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    elif isinstance(module, nn.Embedding):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.LayerNorm):
+        module.bias.data.zero_()
+        module.weight.data.fill_(1.0)
+You can have some more custom schemes if you need a special initialization for some modules. For instance, in
+Wav2Vec2ForPreTraining, the last two linear layers need to have the initialization of the regular PyTorch nn.Linear
+but all the other ones should use an initialization as above. This is coded like this:
+py
+def _init_weights(self, module):
+    """Initialize the weights"""
+    if isinstance(module, Wav2Vec2ForPreTraining):
+        module.project_hid.reset_parameters()
+        module.project_q.reset_parameters()
+        module.project_hid._is_hf_initialized = True
+        module.project_q._is_hf_initialized = True
+    elif isinstance(module, nn.Linear):
+        module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        if module.bias is not None:
+            module.bias.data.zero_()
+The _is_hf_initialized flag is internally used to make sure we only initialize a submodule once. By setting it to
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a790d9044f47b18a4bd4ef00044ebaf7f2e39b3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_17.txt
@@ -0,0 +1,10 @@
+True for module.project_q and module.project_hid, we make sure the custom initialization we did is not overridden later on,
+the _init_weights function won't be applied to them.
+6. Write a conversion script
+Next, you should write a conversion script that lets you convert the checkpoint you used to debug brand_new_bert in
+the original repository to a checkpoint compatible with your just created 🤗 Transformers implementation of
+brand_new_bert. It is not advised to write the conversion script from scratch, but rather to look through already
+existing conversion scripts in 🤗 Transformers for one that has been used to convert a similar model that was written in
+the same framework as brand_new_bert. Usually, it is enough to copy an already existing conversion script and
+slightly adapt it for your use case. Don't hesitate to ask the Hugging Face team to point you to a similar already
+existing conversion script for your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a27078e43cf8c6af55b47adcfd5564e1f2f1c26
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_18.txt
@@ -0,0 +1,20 @@
+If you are porting a model from TensorFlow to PyTorch, a good starting point might be BERT's conversion script here
+If you are porting a model from PyTorch to PyTorch, a good starting point might be BART's conversion script here
+
+In the following, we'll quickly explain how PyTorch models store layer weights and define layer names. In PyTorch, the
+name of a layer is defined by the name of the class attribute you give the layer. Let's define a dummy model in
+PyTorch, called SimpleModel as follows:
+thon
+from torch import nn
+class SimpleModel(nn.Module):
+    def init(self):
+        super().init()
+        self.dense = nn.Linear(10, 10)
+        self.intermediate = nn.Linear(10, 10)
+        self.layer_norm = nn.LayerNorm(10)
+
+Now we can create an instance of this model definition which will fill all weights: dense, intermediate,
+layer_norm with random weights. We can print the model to see its architecture
+thon
+model = SimpleModel()
+print(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b7981e40ef1c86abf765ebb5d9a304d9f792634
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_19.txt
@@ -0,0 +1,21 @@
+This will print out the following:
+SimpleModel(
+  (dense): Linear(in_features=10, out_features=10, bias=True)
+  (intermediate): Linear(in_features=10, out_features=10, bias=True)
+  (layer_norm): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
+)
+We can see that the layer names are defined by the name of the class attribute in PyTorch. You can print out the weight
+values of a specific layer:
+python
+print(model.dense.weight.data)
+to see that the weights were randomly initialized
+tensor([[-0.0818,  0.2207, -0.0749, -0.0030,  0.0045, -0.1569, -0.1598,  0.0212,
+         -0.2077,  0.2157],
+        [ 0.1044,  0.0201,  0.0990,  0.2482,  0.3116,  0.2509,  0.2866, -0.2190,
+          0.2166, -0.0212],
+        [-0.2000,  0.1107, -0.1999, -0.3119,  0.1559,  0.0993,  0.1776, -0.1950,
+         -0.1023, -0.0447],
+        [-0.0888, -0.1092,  0.2281,  0.0336,  0.1817, -0.0115,  0.2096,  0.1415,
+         -0.1876, -0.2467],
+        [ 0.2208, -0.2352, -0.1426, -0.2636, -0.2889, -0.2061, -0.2849, -0.0465,
+          0.2577,  0.0402],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa322fb2194a9be0f00d78062a08488a7466834
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+As you can see, we do make use of inheritance in 🤗 Transformers, but we keep the level of abstraction to an absolute
+minimum. There are never more than two levels of abstraction for any model in the library. BrandNewBertModel
+inherits from BrandNewBertPreTrainedModel which in turn inherits from [PreTrainedModel] and
+that's it. As a general rule, we want to make sure that a new model only depends on
+[PreTrainedModel]. The important functionalities that are automatically provided to every new
+model are [~PreTrainedModel.from_pretrained] and
+[~PreTrainedModel.save_pretrained], which are used for serialization and deserialization. All of the
+other important functionalities, such as BrandNewBertModel.forward should be completely defined in the new
+modeling_brand_new_bert.py script. Next, we want to make sure that a model with a specific head layer, such as
+BrandNewBertForMaskedLM does not inherit from BrandNewBertModel, but rather uses BrandNewBertModel
+as a component that can be called in its forward pass to keep the level of abstraction low. Every new model requires a
+configuration class, called BrandNewBertConfig. This configuration is always stored as an attribute in
+[PreTrainedModel], and thus can be accessed via the config attribute for all classes
+inheriting from BrandNewBertPreTrainedModel:
+python
+model = BrandNewBertModel.from_pretrained("brandy/brand_new_bert")
+model.config  # model has access to its config
+Similar to the model, the configuration inherits basic serialization and deserialization functionalities from
+[PretrainedConfig]. Note that the configuration and the model are always serialized into two
+different formats - the model to a pytorch_model.bin file and the configuration to a config.json file. Calling
+the model's [~PreTrainedModel.save_pretrained] will automatically call
+the config's [~PretrainedConfig.save_pretrained], so that both model and configuration are saved.
+Code style
+When coding your new model, keep in mind that Transformers is an opinionated library and we have a few quirks of our
+own regarding how code should be written :-)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a275edccb724813d4e8abf87aadf73638b72d20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_20.txt
@@ -0,0 +1,20 @@
+0.2577,  0.0402],
+        [ 0.1502,  0.2465,  0.2566,  0.0693,  0.2352, -0.0530,  0.1859, -0.0604,
+          0.2132,  0.1680],
+        [ 0.1733, -0.2407, -0.1721,  0.1484,  0.0358, -0.0633, -0.0721, -0.0090,
+          0.2707, -0.2509],
+        [-0.1173,  0.1561,  0.2945,  0.0595, -0.1996,  0.2988, -0.0802,  0.0407,
+          0.1829, -0.1568],
+        [-0.1164, -0.2228, -0.0403,  0.0428,  0.1339,  0.0047,  0.1967,  0.2923,
+          0.0333, -0.0536],
+        [-0.1492, -0.1616,  0.1057,  0.1950, -0.2807, -0.2710, -0.1586,  0.0739,
+          0.2220,  0.2358]]).
+In the conversion script, you should fill those randomly initialized weights with the exact weights of the
+corresponding layer in the checkpoint. E.g.
+thon
+retrieve matching layer weights, e.g. by
+recursive algorithm
+layer_name = "dense"
+pretrained_weight = array_of_dense_layer
+model_pointer = getattr(model, "dense")
+model_pointer.weight.data = torch.from_numpy(pretrained_weight)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ea64474a6f486421db206ca20cfaf13c301561b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_21.txt
@@ -0,0 +1,29 @@
+While doing so, you must verify that each randomly initialized weight of your PyTorch model and its corresponding
+pretrained checkpoint weight exactly match in both shape and name. To do so, it is necessary to add assert
+statements for the shape and print out the names of the checkpoints weights. E.g. you should add statements like:
+python
+assert (
+    model_pointer.weight.shape == pretrained_weight.shape
+), f"Pointer shape of random weight {model_pointer.shape} and array shape of checkpoint weight {pretrained_weight.shape} mismatched"
+Besides, you should also print out the names of both weights to make sure they match, e.g.
+python
+logger.info(f"Initialize PyTorch weight {layer_name} from {pretrained_weight.name}")
+If either the shape or the name doesn't match, you probably assigned the wrong checkpoint weight to a randomly
+initialized layer of the 🤗 Transformers implementation.
+An incorrect shape is most likely due to an incorrect setting of the config parameters in BrandNewBertConfig() that
+do not exactly match those that were used for the checkpoint you want to convert. However, it could also be that
+PyTorch's implementation of a layer requires the weight to be transposed beforehand.
+Finally, you should also check that all required weights are initialized and print out all checkpoint weights that
+were not used for initialization to make sure the model is correctly converted. It is completely normal, that the
+conversion trials fail with either a wrong shape statement or a wrong name assignment. This is most likely because either
+you used incorrect parameters in BrandNewBertConfig(), have a wrong architecture in the 🤗 Transformers
+implementation, you have a bug in the init() functions of one of the components of the 🤗 Transformers
+implementation or you need to transpose one of the checkpoint weights.
+This step should be iterated with the previous step until all weights of the checkpoint are correctly loaded in the
+Transformers model. Having correctly loaded the checkpoint into the 🤗 Transformers implementation, you can then save
+the model under a folder of your choice /path/to/converted/checkpoint/folder that should then contain both a
+pytorch_model.bin file and a config.json file:
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5473b7bd1a93124d285d419849511c76885d771
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_22.txt
@@ -0,0 +1,22 @@
+python
+model.save_pretrained("/path/to/converted/checkpoint/folder")
+7. Implement the forward pass
+Having managed to correctly load the pretrained weights into the 🤗 Transformers implementation, you should now make
+sure that the forward pass is correctly implemented. In Get familiar with the original repository, you have already created a script that runs a forward
+pass of the model using the original repository. Now you should write an analogous script using the 🤗 Transformers
+implementation instead of the original one. It should look as follows:
+python
+model = BrandNewBertModel.from_pretrained("/path/to/converted/checkpoint/folder")
+input_ids = [0, 4, 4, 3, 2, 4, 1, 7, 19]
+output = model(input_ids).last_hidden_states
+It is very likely that the 🤗 Transformers implementation and the original model implementation don't give the exact
+same output the very first time or that the forward pass throws an error. Don't be disappointed - it's expected! First,
+you should make sure that the forward pass doesn't throw any errors. It often happens that the wrong dimensions are
+used leading to a Dimensionality mismatch error or that the wrong data type object is used, e.g. torch.long
+instead of torch.float32. Don't hesitate to ask the Hugging Face team for help, if you don't manage to solve
+certain errors.
+The final part to make sure the 🤗 Transformers implementation works correctly is to ensure that the outputs are
+equivalent to a precision of 1e-3. First, you should ensure that the output shapes are identical, i.e.
+outputs.shape should yield the same value for the script of the 🤗 Transformers implementation and the original
+implementation. Next, you should make sure that the output values are identical as well. This one of the most difficult
+parts of adding a new model. Common mistakes why the outputs are not identical are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d13db8cfe3e9368b6c8476189083fcc7d069844
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_23.txt
@@ -0,0 +1,25 @@
+Some layers were not added, i.e. an activation layer was not added, or the residual connection was forgotten
+The word embedding matrix was not tied
+The wrong positional embeddings are used because the original implementation uses on offset
+Dropout is applied during the forward pass. To fix this make sure model.training is False and that no dropout
+  layer is falsely activated during the forward pass, i.e. pass self.training to PyTorch's functional dropout
+
+The best way to fix the problem is usually to look at the forward pass of the original implementation and the 🤗
+Transformers implementation side-by-side and check if there are any differences. Ideally, you should debug/print out
+intermediate outputs of both implementations of the forward pass to find the exact position in the network where the 🤗
+Transformers implementation shows a different output than the original implementation. First, make sure that the
+hard-coded input_ids in both scripts are identical. Next, verify that the outputs of the first transformation of
+the input_ids (usually the word embeddings) are identical. And then work your way up to the very last layer of the
+network. At some point, you will notice a difference between the two implementations, which should point you to the bug
+in the 🤗 Transformers implementation. From our experience, a simple and efficient way is to add many print statements
+in both the original implementation and 🤗 Transformers implementation, at the same positions in the network
+respectively, and to successively remove print statements showing the same values for intermediate presentations.
+When you're confident that both implementations yield the same output, verify the outputs with
+torch.allclose(original_output, output, atol=1e-3), you're done with the most difficult part! Congratulations - the
+work left to be done should be a cakewalk 😊.
+8. Adding all necessary model tests
+At this point, you have successfully added a new model. However, it is very much possible that the model does not yet
+fully comply with the required design. To make sure, the implementation is fully compatible with 🤗 Transformers, all
+common tests should pass. The Cookiecutter should have automatically added a test file for your model, probably under
+the same tests/models/brand_new_bert/test_modeling_brand_new_bert.py. Run this test file to verify that all common
+tests pass:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60eb19c91100aaecf108cb92b240fead911a8767
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_24.txt
@@ -0,0 +1,22 @@
+pytest tests/models/brand_new_bert/test_modeling_brand_new_bert.py
+Having fixed all common tests, it is now crucial to ensure that all the nice work you have done is well tested, so that
+
+a) The community can easily understand your work by looking at specific tests of brand_new_bert
+b) Future changes to your model will not break any important feature of the model.
+
+At first, integration tests should be added. Those integration tests essentially do the same as the debugging scripts
+you used earlier to implement the model to 🤗 Transformers. A template of those model tests has already added by the
+Cookiecutter, called BrandNewBertModelIntegrationTests and only has to be filled out by you. To ensure that those
+tests are passing, run
+
+RUN_SLOW=1 pytest -sv tests/models/brand_new_bert/test_modeling_brand_new_bert.py::BrandNewBertModelIntegrationTests
+
+In case you are using Windows, you should replace RUN_SLOW=1 with SET RUN_SLOW=1
+
+Second, all features that are special to brand_new_bert should be tested additionally in a separate test under
+BrandNewBertModelTester/BrandNewBertModelTest. This part is often forgotten but is extremely useful in two
+ways:
+
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0119968d7f0568e44b8829d939c3bfbaf4c2f556
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_25.txt
@@ -0,0 +1,24 @@
+It helps to transfer the knowledge you have acquired during the model addition to the community by showing how the
+  special features of brand_new_bert should work.
+Future contributors can quickly test changes to the model by running those special tests.
+
+9. Implement the tokenizer
+Next, we should add the tokenizer of brand_new_bert. Usually, the tokenizer is equivalent to or very similar to an
+already existing tokenizer of 🤗 Transformers.
+It is very important to find/extract the original tokenizer file and to manage to load this file into the 🤗
+Transformers' implementation of the tokenizer.
+To ensure that the tokenizer works correctly, it is recommended to first create a script in the original repository
+that inputs a string and returns the input_ids. It could look similar to this (in pseudo-code):
+python
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = model.tokenize(input_str)
+You might have to take a deeper look again into the original repository to find the correct tokenizer function or you
+might even have to do changes to your clone of the original repository to only output the input_ids. Having written
+a functional tokenization script that uses the original repository, an analogous script for 🤗 Transformers should be
+created. It should look similar to this:
+thon
+from transformers import BrandNewBertTokenizer
+input_str = "This is a long example input string containing special characters .$?-, numbers 2872 234 12 and words."
+tokenizer = BrandNewBertTokenizer.from_pretrained("/path/to/tokenizer/folder/")
+input_ids = tokenizer(input_str).input_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..def25f079512ad0cbd8d37fcb25ff2f46715c253
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_26.txt
@@ -0,0 +1,23 @@
+When both input_ids yield the same values, as a final step a tokenizer test file should also be added.
+Analogous to the modeling test files of brand_new_bert, the tokenization test files of brand_new_bert should
+contain a couple of hard-coded integration tests.
+10. Run End-to-end integration tests
+Having added the tokenizer, you should also add a couple of end-to-end integration tests using both the model and the
+tokenizer to tests/models/brand_new_bert/test_modeling_brand_new_bert.py in 🤗 Transformers.
+Such a test should show on a meaningful
+text-to-text sample that the 🤗 Transformers implementation works as expected. A meaningful text-to-text sample can
+include e.g. a source-to-target-translation pair, an article-to-summary pair, a question-to-answer pair, etc… If none
+of the ported checkpoints has been fine-tuned on a downstream task it is enough to simply rely on the model tests. In a
+final step to ensure that the model is fully functional, it is advised that you also run all tests on GPU. It can
+happen that you forgot to add some .to(self.device) statements to internal tensors of the model, which in such a
+test would show in an error. In case you have no access to a GPU, the Hugging Face team can take care of running those
+tests for you.
+11. Add Docstring
+Now, all the necessary functionality for brand_new_bert is added - you're almost done! The only thing left to add is
+a nice docstring and a doc page. The Cookiecutter should have added a template file called
+docs/source/model_doc/brand_new_bert.md that you should fill out. Users of your model will usually first look at
+this page before using your model. Hence, the documentation must be understandable and concise. It is very useful for
+the community to add some Tips to show how the model should be used. Don't hesitate to ping the Hugging Face team
+regarding the docstrings.
+Next, make sure that the docstring added to src/transformers/models/brand_new_bert/modeling_brand_new_bert.py is
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed2111cefb04e8dbd8cf73c06f77e0362dc2ed8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+correct and included all necessary inputs and outputs. We have a detailed guide about writing documentation and our docstring format here. It is always good to remind oneself that documentation should
+be treated at least as carefully as the code in 🤗 Transformers since the documentation is usually the first contact
+point of the community with the model.
+Code refactor
+Great, now you have added all the necessary code for brand_new_bert. At this point, you should correct some potential
+incorrect code style by running:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d66445b2cca78b1a026f48b4494f2e1f836698b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_28.txt
@@ -0,0 +1,19 @@
+make style
+and verify that your coding style passes the quality check:
+
+make quality
+There are a couple of other very strict design tests in 🤗 Transformers that might still be failing, which shows up in
+the tests of your pull request. This is often because of some missing information in the docstring or some incorrect
+naming. The Hugging Face team will surely help you if you're stuck here.
+Lastly, it is always a good idea to refactor one's code after having ensured that the code works correctly. With all
+tests passing, now it's a good time to go over the added code again and do some refactoring.
+You have now finished the coding part, congratulation! 🎉 You are Awesome! 😎
+12. Upload the models to the model hub
+In this final part, you should convert and upload all checkpoints to the model hub and add a model card for each
+uploaded model checkpoint. You can get familiar with the hub functionalities by reading our Model sharing and uploading Page. You should work alongside the Hugging Face team here to decide on a fitting name for each
+checkpoint and to get the required access rights to be able to upload the model under the author's organization of
+brand_new_bert. The push_to_hub method, present in all models in transformers, is a quick and efficient way to push your checkpoint to the hub. A little snippet is pasted below:
+thon
+brand_new_bert.push_to_hub("brand_new_bert")
+Uncomment the following line to push to an organization.
+brand_new_bert.push_to_hub("/brand_new_bert")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2ad34860e79565120fc7a3849922f7ada8b1ee0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_29.txt
@@ -0,0 +1,18 @@
+It is worth spending some time to create fitting model cards for each checkpoint. The model cards should highlight the
+specific characteristics of this particular checkpoint, e.g. On which dataset was the checkpoint
+pretrained/fine-tuned on? On what down-stream task should the model be used? And also include some code on how to
+correctly use the model.
+13. (Optional) Add notebook
+It is very helpful to add a notebook that showcases in-detail how brand_new_bert can be used for inference and/or
+fine-tuned on a downstream task. This is not mandatory to merge your PR, but very useful for the community.
+14. Submit your finished PR
+You're done programming now and can move to the last step, which is getting your PR merged into main. Usually, the
+Hugging Face team should have helped you already at this point, but it is worth taking some time to give your finished
+PR a nice description and eventually add comments to your code, if you want to point out certain design choices to your
+reviewer.
+Share your work!!
+Now, it's time to get some credit from the community for your work! Having completed a model addition is a major
+contribution to Transformers and the whole NLP community. Your code and the ported pre-trained models will certainly be
+used by hundreds and possibly even thousands of developers and researchers. You should be proud of your work and share
+your achievements with the community.
+You have made another model that is super easy to access for everyone in the community! 🤯
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2908e29fc0ee842568591928a6444b598f4a68f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+The forward pass of your model should be fully written in the modeling file while being fully independent of other
+   models in the library. If you want to reuse a block from another model, copy the code and paste it with a
+   # Copied from comment on top (see here
+   for a good example and there for more documentation on Copied from). 
+The code should be fully understandable, even by a non-native English speaker. This means you should pick
+   descriptive variable names and avoid abbreviations. As an example, activation is preferred to act.
+   One-letter variable names are strongly discouraged unless it's an index in a for loop.
+More generally we prefer longer explicit code to short magical one.
+Avoid subclassing nn.Sequential in PyTorch but subclass nn.Module and write the forward pass, so that anyone
+   using your code can quickly debug it by adding print statements or breaking points.
+Your function signature should be type-annotated. For the rest, good variable names are way more readable and
+   understandable than type annotations.
+
+Overview of tokenizers
+Not quite ready yet :-( This section will be added soon!
+Step-by-step recipe to add a model to 🤗 Transformers
+Everyone has different preferences of how to port a model so it can be very helpful for you to take a look at summaries
+of how other contributors ported models to Hugging Face. Here is a list of community blog posts on how to port a model:
+
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee241697e8fdd0fd9982765a4048f1cf6e42f551
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_4.txt
@@ -0,0 +1,16 @@
+Porting GPT2 Model by Thomas
+Porting WMT19 MT Model by Stas
+
+From experience, we can tell you that the most important things to keep in mind when adding a model are:
+
+Don't reinvent the wheel! Most parts of the code you will add for the new 🤗 Transformers model already exist
+  somewhere in 🤗 Transformers. Take some time to find similar, already existing models and tokenizers you can copy
+  from. grep and rg are your
+  friends. Note that it might very well happen that your model's tokenizer is based on one model implementation, and
+  your model's modeling code on another one. E.g. FSMT's modeling code is based on BART, while FSMT's tokenizer code
+  is based on XLM.
+It's more of an engineering challenge than a scientific challenge. You should spend more time creating an
+  efficient debugging environment rather than trying to understand all theoretical aspects of the model in the paper.
+Ask for help, when you're stuck! Models are the core component of 🤗 Transformers so we at Hugging Face are more
+  than happy to help you at every step to add your model. Don't hesitate to ask if you notice you are not making
+  progress.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e212dc19838c72351aad509acd57d394dcaccb8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+In the following, we try to give you a general recipe that we found most useful when porting a model to 🤗 Transformers.
+The following list is a summary of everything that has to be done to add a model and can be used by you as a To-Do
+List:
+☐ (Optional) Understood the model's theoretical aspects
+☐ Prepared 🤗 Transformers dev environment
+☐ Set up debugging environment of the original repository
+☐ Created script that successfully runs the forward() pass using the original repository and checkpoint
+☐ Successfully added the model skeleton to 🤗 Transformers
+☐ Successfully converted original checkpoint to 🤗 Transformers checkpoint
+☐ Successfully ran forward() pass in 🤗 Transformers that gives identical output to original checkpoint
+☐ Finished model tests in 🤗 Transformers
+☐ Successfully added tokenizer in 🤗 Transformers
+☐ Run end-to-end integration tests
+☐ Finished docs
+☐ Uploaded model weights to the Hub
+☐ Submitted the pull request
+☐ (Optional) Added a demo notebook
+To begin with, we usually recommend starting by getting a good theoretical understanding of BrandNewBert. However,
+if you prefer to understand the theoretical aspects of the model on-the-job, then it is totally fine to directly dive
+into the BrandNewBert's code-base. This option might suit you better if your engineering skills are better than
+your theoretical skill, if you have trouble understanding BrandNewBert's paper, or if you just enjoy programming
+much more than reading scientific papers.
+1. (Optional) Theoretical aspects of BrandNewBert
+You should take some time to read BrandNewBert's paper, if such descriptive work exists. There might be large
+sections of the paper that are difficult to understand. If this is the case, this is fine - don't worry! The goal is
+not to get a deep theoretical understanding of the paper, but to extract the necessary information required to
+effectively re-implement the model in 🤗 Transformers. That being said, you don't have to spend too much time on the
+theoretical aspects, but rather focus on the practical ones, namely:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbfb52008c4b4ab362887420a8c235735e9a608f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_6.txt
@@ -0,0 +1,40 @@
+What type of model is brand_new_bert? BERT-like encoder-only model? GPT2-like decoder-only model? BART-like
+  encoder-decoder model? Look at the model_summary if you're not familiar with the differences between those.
+What are the applications of brand_new_bert? Text classification? Text generation? Seq2Seq tasks, e.g.,
+  summarization?
+What is the novel feature of the model that makes it different from BERT/GPT-2/BART?
+Which of the already existing 🤗 Transformers models is most
+  similar to brand_new_bert?
+What type of tokenizer is used? A sentencepiece tokenizer? Word piece tokenizer? Is it the same tokenizer as used
+  for BERT or BART?
+
+After you feel like you have gotten a good overview of the architecture of the model, you might want to write to the
+Hugging Face team with any questions you might have. This might include questions regarding the model's architecture,
+its attention layer, etc. We will be more than happy to help you.
+2. Next prepare your environment
+
+Fork the repository by clicking on the ‘Fork' button on the
+   repository's page. This creates a copy of the code under your GitHub user account.
+
+Clone your transformers fork to your local disk, and add the base repository as a remote:
+
+   git clone https://github.com/[your Github handle]/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Set up a development environment, for instance by running the following command:
+
+   python -m venv .env
+   source .env/bin/activate
+   pip install -e ".[dev]"
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases. You can then return to the parent directory
+
+   cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a795e98f262ed7e48f471a39725c5e6377cf07c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_7.txt
@@ -0,0 +1,24 @@
+cd ..
+
+We recommend adding the PyTorch version of brand_new_bert to Transformers. To install PyTorch, please follow the
+   instructions on https://pytorch.org/get-started/locally/.
+
+Note: You don't need to have CUDA installed. Making the new model work on CPU is sufficient.
+
+To port brand_new_bert, you will also need access to its original repository:
+
+   git clone https://github.com/org_that_created_brand_new_bert_org/brand_new_bert.git
+   cd brand_new_bert
+   pip install -e .
+Now you have set up a development environment to port brand_new_bert to 🤗 Transformers.
+3.-4. Run a pretrained checkpoint using the original repository
+At first, you will work on the original brand_new_bert repository. Often, the original implementation is very
+“researchy”. Meaning that documentation might be lacking and the code can be difficult to understand. But this should
+be exactly your motivation to reimplement brand_new_bert. At Hugging Face, one of our main goals is to make people
+stand on the shoulders of giants which translates here very well into taking a working model and rewriting it to make
+it as accessible, user-friendly, and beautiful as possible. This is the number-one motivation to re-implement
+models into 🤗 Transformers - trying to make complex new NLP technology accessible to everybody.
+You should start thereby by diving into the original repository.
+Successfully running the official pretrained model in the original repository is often the most difficult step.
+From our experience, it is very important to spend some time getting familiar with the original code-base. You need to
+figure out the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f2e45d23bf009feb044a232339c8ac11d7d7e7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_8.txt
@@ -0,0 +1,24 @@
+Where to find the pretrained weights?
+How to load the pretrained weights into the corresponding model?
+How to run the tokenizer independently from the model?
+Trace one forward pass so that you know which classes and functions are required for a simple forward pass. Usually,
+  you only have to reimplement those functions.
+Be able to locate the important components of the model: Where is the model's class? Are there model sub-classes,
+  e.g. EncoderModel, DecoderModel? Where is the self-attention layer? Are there multiple different attention layers,
+  e.g. self-attention, cross-attention?
+How can you debug the model in the original environment of the repo? Do you have to add print statements, can you
+  work with an interactive debugger like ipdb, or should you use an efficient IDE to debug the model, like PyCharm?
+
+It is very important that before you start the porting process, you can efficiently debug code in the original
+repository! Also, remember that you are working with an open-source library, so do not hesitate to open an issue, or
+even a pull request in the original repository. The maintainers of this repository are most likely very happy about
+someone looking into their code!
+At this point, it is really up to you which debugging environment and strategy you prefer to use to debug the original
+model. We strongly advise against setting up a costly GPU environment, but simply work on a CPU both when starting to
+dive into the original repository and also when starting to write the 🤗 Transformers implementation of the model. Only
+at the very end, when the model has already been successfully ported to 🤗 Transformers, one should verify that the
+model also works as expected on GPU.
+In general, there are two possible debugging environments for running the original model
+
+Jupyter notebooks / google colab
+Local python scripts.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f984a7f9532a63253d0da5701aae98e3b22e2efe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_model.txt_chunk_9.txt
@@ -0,0 +1,29 @@
+Jupyter notebooks / google colab
+Local python scripts.
+
+Jupyter notebooks have the advantage that they allow for cell-by-cell execution which can be helpful to better split
+logical components from one another and to have faster debugging cycles as intermediate results can be stored. Also,
+notebooks are often easier to share with other contributors, which might be very helpful if you want to ask the Hugging
+Face team for help. If you are familiar with Jupyter notebooks, we strongly recommend you work with them.
+The obvious disadvantage of Jupyter notebooks is that if you are not used to working with them you will have to spend
+some time adjusting to the new programming environment and you might not be able to use your known debugging tools
+anymore, like ipdb.
+For each code-base, a good first step is always to load a small pretrained checkpoint and to be able to reproduce a
+single forward pass using a dummy integer vector of input IDs as an input. Such a script could look like this (in
+pseudocode):
+python
+model = BrandNewBertModel.load_pretrained_checkpoint("/path/to/checkpoint/")
+input_ids = [0, 4, 5, 2, 3, 7, 9]  # vector of input ids
+original_output = model.predict(input_ids)
+Next, regarding the debugging strategy, there are generally a few from which to choose from:
+
+Decompose the original model into many small testable components and run a forward pass on each of those for
+  verification
+Decompose the original model only into the original tokenizer and the original model, run a forward pass on
+  those, and use intermediate print statements or breakpoints for verification
+
+Again, it is up to you which strategy to choose. Often, one or the other is advantageous depending on the original code
+base.
+If the original code-base allows you to decompose the model into smaller sub-components, e.g. if the original
+code-base can easily be run in eager mode, it is usually worth the effort to do so. There are some important advantages
+to taking the more difficult road in the beginning:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e27e3c3b8c315be628dcad5cdcea1dc367e280c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+How to create a custom pipeline?
+In this guide, we will see how to create a custom pipeline and share it on the Hub or add it to the
+🤗 Transformers library.
+First and foremost, you need to decide the raw entries the pipeline will be able to take. It can be strings, raw bytes,
+dictionaries or whatever seems to be the most likely desired input. Try to keep these inputs as pure Python as possible
+as it makes compatibility easier (even through other languages via JSON). Those will be the inputs of the
+pipeline (preprocess).
+Then define the outputs. Same policy as the inputs. The simpler, the better. Those will be the outputs of
+postprocess method.
+Start by inheriting the base class Pipeline with the 4 methods needed to implement preprocess,
+_forward, postprocess, and _sanitize_parameters.
+thon
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, inputs, maybe_arg=2):
+    model_input = Tensor(inputs["input_ids"])
+    return {"model_input": model_input}
+
+def _forward(self, model_inputs):
+    # model_inputs == {"model_input": model_input}
+    outputs = self.model(**model_inputs)
+    # Maybe {"logits": Tensor()}
+    return outputs
+
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f904111d4e3558d4b9171eeb1f93ccea460138a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+def postprocess(self, model_outputs):
+    best_class = model_outputs["logits"].softmax(-1)
+    return best_class
+
+The structure of this breakdown is to support relatively seamless support for CPU/GPU, while supporting doing
+pre/postprocessing on the CPU on different threads
+preprocess will take the originally defined inputs, and turn them into something feedable to the model. It might
+contain more information and is usually a Dict.
+_forward is the implementation detail and is not meant to be called directly. forward is the preferred
+called method as it contains safeguards to make sure everything is working on the expected device. If anything is
+linked to a real model it belongs in the _forward method, anything else is in the preprocess/postprocess.
+postprocess methods will take the output of _forward and turn it into the final output that was decided
+earlier.
+_sanitize_parameters exists to allow users to pass any parameters whenever they wish, be it at initialization
+time pipeline(., maybe_arg=4) or at call time pipe = pipeline(); output = pipe(., maybe_arg=4).
+The returns of _sanitize_parameters are the 3 dicts of kwargs that will be passed directly to preprocess,
+_forward, and postprocess. Don't fill anything if the caller didn't call with any extra parameter. That
+allows to keep the default arguments in the function definition which is always more "natural".
+A classic example would be a top_k argument in the post processing in classification tasks.
+thon
+
+pipe = pipeline("my-new-task")
+pipe("This is a test")
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}, {"label": "3-star", "score": 0.05}
+{"label": "4-star", "score": 0.025}, {"label": "5-star", "score": 0.025}]
+pipe("This is a test", top_k=2)
+[{"label": "1-star", "score": 0.8}, {"label": "2-star", "score": 0.1}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4daa18198747468e2076e3e839c6d53be0b6e865
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_2.txt
@@ -0,0 +1,41 @@
+In order to achieve that, we'll update our postprocess method with a default parameter to 5. and edit
+_sanitize_parameters to allow this new parameter.
+thon
+def postprocess(self, model_outputs, top_k=5):
+    best_class = model_outputs["logits"].softmax(-1)
+    # Add logic to handle top_k
+    return best_class
+def _sanitize_parameters(self, **kwargs):
+    preprocess_kwargs = {}
+    if "maybe_arg" in kwargs:
+        preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+postprocess_kwargs = {}
+if "top_k" in kwargs:
+    postprocess_kwargs["top_k"] = kwargs["top_k"]
+return preprocess_kwargs, {}, postprocess_kwargs
+
+Try to keep the inputs/outputs very simple and ideally JSON-serializable as it makes the pipeline usage very easy
+without requiring users to understand new kinds of objects. It's also relatively common to support many different types
+of arguments for ease of use (audio files, which can be filenames, URLs or pure bytes)
+Adding it to the list of supported tasks
+To register your new-task to the list of supported tasks, you have to add it to the PIPELINE_REGISTRY:
+thon
+from transformers.pipelines import PIPELINE_REGISTRY
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+)
+
+You can specify a default model if you want, in which case it should come with a specific revision (which can be the name of a branch or a commit hash, here we took "abcdef") as well as the type:
+python
+PIPELINE_REGISTRY.register_pipeline(
+    "new-task",
+    pipeline_class=MyPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    default={"pt": ("user/awesome_model", "abcdef")},
+    type="text",  # current support type: text, audio, image, multimodal
+)
+Share your pipeline on the Hub
+To share your custom pipeline on the Hub, you just have to save the custom code of your Pipeline subclass in a
+python file. For instance, let's say we want to use a custom pipeline for sentence pair classification like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4470c69a871a9694573b2c7741ad9bb51b97a6e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_3.txt
@@ -0,0 +1,46 @@
+import numpy as np
+from transformers import Pipeline
+def softmax(outputs):
+    maxes = np.max(outputs, axis=-1, keepdims=True)
+    shifted_exp = np.exp(outputs - maxes)
+    return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
+class PairClassificationPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+def preprocess(self, text, second_text=None):
+    return self.tokenizer(text, text_pair=second_text, return_tensors=self.framework)
+
+def _forward(self, model_inputs):
+    return self.model(**model_inputs)
+
+def postprocess(self, model_outputs):
+    logits = model_outputs.logits[0].numpy()
+    probabilities = softmax(logits)
+
+    best_class = np.argmax(probabilities)
+    label = self.model.config.id2label[best_class]
+    score = probabilities[best_class].item()
+    logits = logits.tolist()
+    return {"label": label, "score": score, "logits": logits}
+
+The implementation is framework agnostic, and will work for PyTorch and TensorFlow models. If we have saved this in
+a file named pair_classification.py, we can then import it and register it like this:
+
+from pair_classification import PairClassificationPipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
+PIPELINE_REGISTRY.register_pipeline(
+    "pair-classification",
+    pipeline_class=PairClassificationPipeline,
+    pt_model=AutoModelForSequenceClassification,
+    tf_model=TFAutoModelForSequenceClassification,
+)
+
+Once this is done, we can use it with a pretrained model. For instance sgugger/finetuned-bert-mrpc has been
+fine-tuned on the MRPC dataset, which classifies pairs of sentences as paraphrases or not.
+
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4035c826eaea62496981d373fc4f65c7014eefa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+from transformers import pipeline
+classifier = pipeline("pair-classification", model="sgugger/finetuned-bert-mrpc")
+
+Then we can share it on the Hub by using the push_to_hub method:
+py
+classifier.push_to_hub("test-dynamic-pipeline")
+This will copy the file where you defined PairClassificationPipeline inside the folder "test-dynamic-pipeline",
+along with saving the model and tokenizer of the pipeline, before pushing everything into the repository
+{your_username}/test-dynamic-pipeline. After that, anyone can use it as long as they provide the option
+trust_remote_code=True:
+
+from transformers import pipeline
+classifier = pipeline(model="{your_username}/test-dynamic-pipeline", trust_remote_code=True)
+
+Add the pipeline to 🤗 Transformers
+If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the pipelines submodule
+with the code of your pipeline, then add it to the list of tasks defined in pipelines/__init__.py.
+Then you will need to add tests. Create a new file tests/test_pipelines_MY_PIPELINE.py with examples of the other tests.
+The run_pipeline_test function will be very generic and run on small random models on every possible
+architecture as defined by model_mapping and tf_model_mapping.
+This is very important to test future compatibility, meaning if someone adds a new model for
+XXXForQuestionAnswering then the pipeline test will attempt to run on it. Because the models are random it's
+impossible to check for actual values, that's why there is a helper ANY that will simply attempt to match the
+output of the pipeline TYPE.
+You also need to implement 2 (ideally 4) tests.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16faf10f85f4000a834d9c588d1001b2446302c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_add_new_pipeline.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+test_small_model_pt : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_tf.
+test_small_model_tf : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)
+  and test the pipeline outputs. The results should be the same as test_small_model_pt.
+test_large_model_pt (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
+test_large_model_tf (optional): Tests the pipeline on a real pipeline where the results are supposed to
+  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make
+  sure there is no drift in future releases.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d8731a976985b48f04f2a099b97d7a7d90286a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Agents and tools
+[[open-in-colab]]
+What is an agent?
+Large Language Models (LLMs) trained to perform causal language modeling can tackle a wide range of tasks, but they often struggle with basic tasks like logic, calculation, and search. When prompted in domains in which they do not perform well, they often fail to generate the answer we expect them to.
+One approach to overcome this weakness is to create an agent.
+An agent is a system that uses an LLM as its engine, and it has access to functions called tools.
+These tools are functions for performing a task, and they contain all necessary description for the agent to properly use them.
+The agent can be programmed to:
+- devise a series of actions/tools and run them all at once like the [CodeAgent] for example
+- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one like the [ReactJsonAgent] for example
+Types of agents
+Code agent
+This agent has a planning step, then generates python code to execute all its actions at once. It natively handles different input and output types for its tools, thus it is the recommended choice for multimodal tasks.
+React agents
+This is the go-to agent to solve reasoning tasks, since the ReAct framework (Yao et al., 2022) makes it really efficient to think on the basis of its previous observations.
+We implement two versions of ReactJsonAgent: 
+- [ReactJsonAgent] generates tool calls as a JSON in its output.
+- [ReactCodeAgent] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
+
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2261a01070f27f9109e805f1b14a8ca8690d1574
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+[!TIP]
+Read Open-source LLMs as LangChain Agents blog post to learn more the ReAct agent.
+
+For example, here is how a ReAct agent would work its way through the following question.
+3
+
+agent.run(
+     "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?",
+ )
+=====New task=====
+How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?
+====Agent is executing the code below:
+bert_blocks = search(query="number of blocks in BERT base encoder")
+print("BERT blocks:", bert_blocks)
+====
+Print outputs:
+BERT blocks: twelve encoder blocks
+
+====Agent is executing the code below:
+attention_layer = search(query="number of layers in Attention is All You Need")
+print("Attention layers:", attention_layer)
+====
+Print outputs:
+Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture.
+====Agent is executing the code below:
+bert_blocks = 12
+attention_layers = 6
+diff = bert_blocks - attention_layers
+print("Difference in blocks:", diff)
+final_answer(diff)
+====
+Print outputs:
+Difference in blocks: 6
+Final answer: 6
+
+How can I build an agent?
+To initialize an agent, you need these arguments:
+
+an LLM to power your agent - the agent is not exactly the LLM, it’s more like the agent is a program that uses an LLM as its engine.
+a system prompt: what the LLM engine will be prompted with to generate its output
+a toolbox from which the agent pick tools to execute
+a parser to extract from the LLM output which tools are to call and with which arguments
+
+Upon initialization of the agent system, the tool attributes are used to generate a tool description, then baked into the agent’s system_prompt to let it know which tools it can use and why.
+To start with, please install the agents extras in order to install all default dependencies.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bce8107fa4ba3f33d5a31b1bcc03445b28059031
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+pip install transformers[agents]
+Build your LLM engine by defining a llm_engine method which accepts a list of messages and returns text. This callable also needs to accept a stop argument that indicates when to stop generating.
+thon
+from huggingface_hub import login, InferenceClient
+login("")
+client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct")
+def llm_engine(messages, stop_sequences=["Task"]) -> str:
+    response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000)
+    answer = response.choices[0].message.content
+    return answer
+
+You could use any llm_engine method as long as:
+1. it follows the messages format for its input (List[Dict[str, str]]) and returns a str
+2. it stops generating outputs at the sequences passed in the argument stop
+You also need a tools argument which accepts a list of Tools. You can provide an empty list for tools, but use the default toolbox with the optional argument add_base_tools=True.
+Now you can create an agent, like [CodeAgent], and run it. For convenience, we also provide the [HfEngine] class that uses huggingface_hub.InferenceClient under the hood.
+thon
+from transformers import CodeAgent, HfEngine
+llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct")
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and return the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+This will be handy in case of emergency baguette need!
+You can even leave the argument llm_engine undefined, and an [HfEngine] will be created by default.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], add_base_tools=True)
+agent.run(
+    "Could you translate this sentence from French, say it out loud and give me the audio.",
+    sentence="Où est la boulangerie la plus proche?",
+)
+
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0840ad013d6a7aa08dc750f3bacf1fc5e39c844a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+Note that we used an additional sentence argument: you can pass text as additional arguments to the model.
+You can also use this to indicate the path to local or remote files for the model to use:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
+
+The prompt and output parser were automatically defined, but you can easily inspect them by calling the system_prompt_template on your agent.
+python
+print(agent.system_prompt_template)
+It's important to explain as clearly as possible the task you want to perform.
+Every [~Agent.run] operation is independent, and since an agent is powered by an LLM, minor variations in your prompt might yield completely different results.
+You can also run an agent consecutively for different tasks: each time the attributes agent.task and agent.logs will be re-initialized.
+Code execution
+A Python interpreter executes the code on a set of inputs passed along with your tools.
+This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
+The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
+You can still authorize additional imports by passing the authorized modules as a list of strings in argument additional_authorized_imports upon initialization of your [ReactCodeAgent] or [CodeAgent]:
+
+from transformers import ReactCodeAgent
+agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
+agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
+
+()
+'Hugging Face – Blog'
+
+The execution will stop at any code trying to perform an illegal operation or if there is a regular Python error with the code generated by the agent.
+
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1f67de40aa0604eb2e6c9c8080873d149d502bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+[!WARNING]
+The LLM can generate arbitrary code that will then be executed: do not add any unsafe imports!
+
+The system prompt
+An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [ReactCodeAgent] (below version is slightly simplified).
+```text
+You will be given a task to solve as best you can.
+You have access to the following tools:
+<>
+To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
+At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use.
+Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence.
+During each intermediate step, you can use 'print()' to save whatever important information you will then need.
+These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step.
+In the end you have to return a final answer using the final_answer tool.
+Here are a few examples using notional tools:
+{examples}
+Above example were using notional tools that might not exist for you. You only have acces to those tools:
+<>
+You also can perform computations in the python code you generate.
+Always provide a 'Thought:' and a 'Code:\npy' sequence ending with '' sequence. You MUST provide at least the 'Code:' sequence to move forward.
+Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks.
+Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result.
+Remember to make sure that variables you use are all defined.
+Now Begin!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..832a7ffd40a53665fa21b1ecc56d11c47e93f7ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+The system prompt includes:
+- An introduction that explains how the agent should behave and what tools are.
+- A description of all the tools that is defined by a <<tool_descriptions>> token that is dynamically replaced at runtime with the tools defined/chosen by the user.
+    - The tool description comes from the tool attributes, name, description, inputs and output_type,  and a simple jinja2 template that you can refine.
+- The expected output format.
+You could improve the system prompt, for example, by adding an explanation of the output format.
+For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the system_prompt parameter.
+thon
+from transformers import ReactJsonAgent
+from transformers.agents import PythonInterpreterTool
+agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
+
+[!WARNING]
+Please make sure to define the <<tool_descriptions>> string somewhere in the template so the agent is aware 
+of the available tools.
+
+Tools
+A tool is an atomic function to be used by an agent.
+You can for instance check the [PythonInterpreterTool]: it has a name, a description, input descriptions, an output type, and a __call__ method to perform the action.
+When the agent is initialized, the tool attributes are used to generate a tool description which is baked into the agent's system prompt. This lets the agent know which tools it can use and why.
+Default toolbox
+Transformers comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument add_base_tools = True:
+
+Document question answering: given a document (such as a PDF) in image format, answer a question on this document (Donut)
+Image question answering: given an image, answer a question on this image (VILT)
+Speech to text: given an audio recording of a person talking, transcribe the speech into text (Whisper)
+Text to speech: convert text to speech (SpeechT5)
+Translation: translates a given sentence from source language to target language.
+Python code interpreter: runs your the LLM generated Python code in a secure environment. This tool will only be added to [ReactJsonAgent] if you use add_base_tools=True, since code-based tools can already execute Python code
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1af2e607fd84bced4c03f3e6d54d806ad025d146
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_6.txt
@@ -0,0 +1,48 @@
+You can manually use a tool by calling the [load_tool] function and a task to perform.
+thon
+from transformers import load_tool
+tool = load_tool("text-to-speech")
+audio = tool("This is a text to speech tool")
+
+Create a new tool
+You can create your own tool for use cases not covered by the default tools from Hugging Face.
+For example, let's create a tool that returns the most downloaded model for a given task from the Hub.
+You'll start with the code below.
+thon
+from huggingface_hub import list_models
+task = "text-classification"
+model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+print(model.id)
+
+This code can be converted into a class that inherits from the [Tool] superclass.
+The custom tool needs:
+- An attribute name, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is model_download_counter.
+- An attribute description is used to populate the agent's system prompt.
+- An inputs attribute, which is a dictionary with keys "type" and "description". It contains information that helps the Python interpreter make educated choices about the input.
+- An output_type attribute, which specifies the output type.
+- A forward method which contains the inference code to be executed.
+thon
+from transformers import Tool
+from huggingface_hub import list_models
+class HFModelDownloadsTool(Tool):
+    name = "model_download_counter"
+    description = (
+        "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. "
+        "It returns the name of the checkpoint."
+    )
+inputs = {
+    "task": {
+        "type": "text",
+        "description": "the task category (such as text-classification, depth-estimation, etc)",
+    }
+}
+output_type = "text"
+
+def forward(self, task: str):
+    model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
+    return model.id
+
+Now that the custom HfModelDownloadsTool class is ready, you can save it to a file named model_downloads.py and import it for use.
+thon
+from model_downloads import HFModelDownloadsTool
+tool = HFModelDownloadsTool()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c134360d449295618f6d78f03e62104a26288ad9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_7.txt
@@ -0,0 +1,41 @@
+You can also share your custom tool to the Hub by calling [~Tool.push_to_hub] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
+python
+tool.push_to_hub("{your_username}/hf-model-downloads")
+Load the tool with the [~Tool.load_tool] function and pass it to the tools parameter in your agent.
+thon
+from transformers import load_tool, CodeAgent
+model_download_tool = load_tool("m-ric/hf-model-downloads")
+agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine)
+agent.run(
+    "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?"
+)
+
+You get the following:
+text
+======== New task ========
+Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?
+==== Agent is executing the code below:
+most_downloaded_model = model_download_counter(task="text-to-video")
+print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.")
+====
+And the output:
+"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."
+Manage agent toolbox
+If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
+Let's add the model_download_tool to an existing agent initialized with only the default toolbox.
+thon
+from transformers import CodeAgent
+agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
+agent.toolbox.add_tool(model_download_tool)
+
+Now we can leverage both the new tool and the previous text-to-speech tool:
+python
+agent.run(
+    "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
+)
+| Audio                                                                                                                                            |
+|------------------------------------------------------------------------------------------------------------------------------------------------------|
+|  |
+
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10e309d2e2f1182d6fb40391c61f2d8a46fb0624
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_8.txt
@@ -0,0 +1,36 @@
+[!WARNING]
+Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
+
+Use the agent.toolbox.update_tool() method to replace an existing tool in the agent's toolbox.
+This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
+Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
+Use a collection of tools
+You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
+Then pass them as a list to initialize you agent, and start using them!
+
+from transformers import ToolCollection, ReactCodeAgent
+image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
+agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
+agent.run("Please draw me a picture of rivers and lakes.")
+
+To speed up the start, tools are loaded only if called by the agent.
+This gets you this image:
+
+Use gradio-tools
+gradio-tools is a powerful library that allows using Hugging
+Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
+Transformers supports gradio_tools with the [Tool.from_gradio] method. For example, let's use the StableDiffusionPromptGeneratorTool from gradio-tools toolkit for improving prompts to generate better images.
+Import and instantiate the tool, then pass it to the Tool.from_gradio method:
+thon
+from gradio_tools import StableDiffusionPromptGeneratorTool
+from transformers import Tool, load_tool, CodeAgent
+gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
+prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
+
+Now you can use it just like any other tool. For example, let's improve the prompt  a rabbit wearing a space suit.
+thon
+image_generation_tool = load_tool('huggingface-tools/text-to-image')
+agent = CodeAgent(tools=[prompt_generator_tool, image_generation_tool], llm_engine=llm_engine)
+agent.run(
+    "Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b8025caf02db0c2881587d7ff06158913453555
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_agents.txt_chunk_9.txt
@@ -0,0 +1,28 @@
+The model adequately leverages the tool:
+text
+======== New task ========
+Improve this prompt, then generate an image of it.
+You have been provided with these initial arguments: {'prompt': 'A rabbit wearing a space suit'}.
+==== Agent is executing the code below:
+improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+while improved_prompt == "QUEUE_FULL":
+    improved_prompt = StableDiffusionPromptGenerator(query=prompt)
+print(f"The improved prompt is {improved_prompt}.")
+image = image_generator(prompt=improved_prompt)
+====
+Before finally generating the image:
+
+[!WARNING]
+gradio-tools require textual inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
+
+Use LangChain tools
+We love Langchain and think it has a very compelling suite of tools.
+To import a tool from LangChain, use the from_langchain() method.
+Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
+thon
+from langchain.agents import load_tools
+from transformers import Tool, ReactCodeAgent
+search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
+agent = ReactCodeAgent(tools=[search_tool])
+agent.run("How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?")
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24331c2df8e2f141794cf3ed1a50c8a3110f0a12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Attention mechanisms
+Most transformer models use full attention in the sense that the attention matrix is square. It can be a big
+computational bottleneck when you have long texts. Longformer and reformer are models that try to be more efficient and
+use a sparse version of the attention matrix to speed up training.
+LSH attention
+Reformer uses LSH attention. In the softmax(QK^t), only the biggest elements (in the softmax
+dimension) of the matrix QK^t are going to give useful contributions. So for each query q in Q, we can consider only
+the keys k in K that are close to q. A hash function is used to determine if q and k are close. The attention mask is
+modified to mask the current token (except at the first position), because it will give a query and a key equal (so
+very similar to each other). Since the hash can be a bit random, several hash functions are used in practice
+(determined by a n_rounds parameter) and then are averaged together.
+Local attention
+Longformer uses local attention: often, the local context (e.g., what are the two tokens to the
+left and right?) is enough to take action for a given token. Also, by stacking attention layers that have a small
+window, the last layer will have a receptive field of more than just the tokens in the window, allowing them to build a
+representation of the whole sentence.
+Some preselected input tokens are also given global attention: for those few tokens, the attention matrix can access
+all tokens and this process is symmetric: all other tokens have access to those specific tokens (on top of the ones in
+their local window). This is shown in Figure 2d of the paper, see below for a sample attention mask:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03744493d9f16ee0fdeff415c5b032f41f006161
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_attention.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Using those attention matrices with less parameters then allows the model to have inputs having a bigger sequence
+length.
+Other tricks
+Axial positional encodings
+Reformer uses axial positional encodings: in traditional transformer models, the positional encoding
+E is a matrix of size \(l\) by \(d\), \(l\) being the sequence length and \(d\) the dimension of the
+hidden state. If you have very long texts, this matrix can be huge and take way too much space on the GPU. To alleviate
+that, axial positional encodings consist of factorizing that big matrix E in two smaller matrices E1 and E2, with
+dimensions \(l_{1} \times d_{1}\) and \(l_{2} \times d_{2}\), such that \(l_{1} \times l_{2} = l\) and
+\(d_{1} + d_{2} = d\) (with the product for the lengths, this ends up being way smaller). The embedding for time
+step \(j\) in E is obtained by concatenating the embeddings for timestep \(j \% l1\) in E1 and \(j // l1\)
+in E2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f58dacdc393a506bc565daad96b264637b2aed30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Load pretrained instances with an AutoClass
+With so many different Transformer architectures, it can be challenging to create one for your checkpoint. As a part of 🤗 Transformers core philosophy to make the library easy, simple and flexible to use, an AutoClass automatically infers and loads the correct architecture from a given checkpoint. The from_pretrained() method lets you quickly load a pretrained model for any architecture so you don't have to devote time and resources to train a model from scratch. Producing this type of checkpoint-agnostic code means if your code works for one checkpoint, it will work with another checkpoint - as long as it was trained for a similar task - even if the architecture is different.
+
+Remember, architecture refers to the skeleton of the model and checkpoints are the weights for a given architecture. For example, BERT is an architecture, while google-bert/bert-base-uncased is a checkpoint. Model is a general term that can mean either architecture or checkpoint.
+
+In this tutorial, learn to:
+
+Load a pretrained tokenizer.
+Load a pretrained image processor
+Load a pretrained feature extractor.
+Load a pretrained processor.
+Load a pretrained model.
+Load a model as a backbone.
+
+AutoTokenizer
+Nearly every NLP task begins with a tokenizer. A tokenizer converts your input into a format that can be processed by the model.
+Load a tokenizer with [AutoTokenizer.from_pretrained]:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+Then tokenize your input as shown below:
+
+sequence = "In a hole in the ground there lived a hobbit."
+print(tokenizer(sequence))
+{'input_ids': [101, 1999, 1037, 4920, 1999, 1996, 2598, 2045, 2973, 1037, 7570, 10322, 4183, 1012, 102], 
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f10b44034c147b635600788af28cf841be3d534
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+AutoImageProcessor
+For vision tasks, an image processor processes the image into the correct input format.
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+AutoBackbone
+
+A Swin backbone with multiple stages for outputting a feature map.
+
+The [AutoBackbone] lets you use pretrained models as backbones to get feature maps from different stages of the backbone. You should specify one of the following parameters in [~PretrainedConfig.from_pretrained]:
+
+out_indices is the index of the layer you'd like to get the feature map from
+out_features is the name of the layer you'd like to get the feature map from
+
+These parameters can be used interchangeably, but if you use both, make sure they're aligned with each other! If you don't pass any of these parameters, the backbone returns the feature map from the last layer.
+
+A feature map from the first stage of the backbone. The patch partition refers to the model stem.
+
+For example, in the above diagram, to return the feature map from the first stage of the Swin backbone, you can set out_indices=(1,):
+
+from transformers import AutoImageProcessor, AutoBackbone
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
+model = AutoBackbone.from_pretrained("microsoft/swin-tiny-patch4-window7-224", out_indices=(1,))
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
+feature_maps = outputs.feature_maps
+
+Now you can access the feature_maps object from the first stage of the backbone:
+
+list(feature_maps[0].shape)
+[1, 96, 56, 56]
+
+AutoFeatureExtractor
+For audio tasks, a feature extractor processes the audio signal the correct input format.
+Load a feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc86bc588aeb653eec9193e7c2800ad150981495
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained(
+     "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
+ )
+
+AutoProcessor
+Multimodal tasks require a processor that combines two types of preprocessing tools. For example, the LayoutLMV2 model requires an image processor to handle images and a tokenizer to handle text; a processor combines both of them.
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+
+AutoModel
+
+The AutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [AutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+For PyTorch models, the from_pretrained() method uses torch.load() which internally uses pickle and is known to be insecure. In general, never load a model that could have come from an untrusted source, or that could have been tampered with. This security risk is partially mitigated for public models hosted on the Hugging Face Hub, which are scanned for malware at each commit. See the Hub documentation for best practices like signed commit verification with GPG.
+TensorFlow and Flax checkpoints are not affected, and can be loaded within PyTorch architectures using the from_tf and from_flax kwargs for the from_pretrained method to circumvent this issue.
+
+Generally, we recommend using the AutoTokenizer class and the AutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a2042104d9fc838ec977afd96051fa12f93e210
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_autoclass_tutorial.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Finally, the TFAutoModelFor classes let you load a pretrained model for a given task (see here for a complete list of available tasks). For example, load a model for sequence classification with [TFAutoModelForSequenceClassification.from_pretrained]:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse the same checkpoint to load an architecture for a different task:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Generally, we recommend using the AutoTokenizer class and the TFAutoModelFor class to load pretrained instances of models. This will ensure you load the correct architecture every time. In the next tutorial, learn how to use your newly loaded tokenizer, image processor, feature extractor and processor to preprocess a dataset for fine-tuning.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e486360619631cb1bc352360bf7df69916fbd4eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Benchmarks
+
+Hugging Face's Benchmarking tools are deprecated and it is advised to use external Benchmarking libraries to measure the speed 
+and memory complexity of Transformer models.
+
+[[open-in-colab]]
+Let's take a look at how 🤗 Transformers models can be benchmarked, best practices, and already available benchmarks.
+A notebook explaining in more detail how to benchmark 🤗 Transformers models can be found here.
+How to benchmark 🤗 Transformers models
+The classes [PyTorchBenchmark] and [TensorFlowBenchmark] allow to flexibly benchmark 🤗 Transformers models. The benchmark classes allow us to measure the peak memory usage and required time for both inference and training.
+
+Hereby, inference is defined by a single forward pass, and training is defined by a single forward pass and
+backward pass.
+
+The benchmark classes [PyTorchBenchmark] and [TensorFlowBenchmark] expect an object of type [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments], respectively, for instantiation. [PyTorchBenchmarkArguments] and [TensorFlowBenchmarkArguments] are data classes and contain all relevant configurations for their corresponding benchmark class. In the following example, it is shown how a BERT model of type bert-base-cased can be benchmarked.
+
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments
+args = PyTorchBenchmarkArguments(models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512])
+benchmark = PyTorchBenchmark(args)
+</pt>
+<tf>py
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments
+args = TensorFlowBenchmarkArguments(
+     models=["google-bert/bert-base-uncased"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+benchmark = TensorFlowBenchmark(args)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9c2d694a3292c2da1f5d49276edb9dfd4b7791a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Here, three arguments are given to the benchmark argument data classes, namely models, batch_sizes, and
+sequence_lengths. The argument models is required and expects a list of model identifiers from the
+model hub The list arguments batch_sizes and sequence_lengths define
+the size of the input_ids on which the model is benchmarked. There are many more parameters that can be configured
+via the benchmark argument data classes. For more detail on these one can either directly consult the files
+src/transformers/benchmark/benchmark_args_utils.py, src/transformers/benchmark/benchmark_args.py (for PyTorch)
+and src/transformers/benchmark/benchmark_args_tf.py (for Tensorflow). Alternatively, running the following shell
+commands from root will print out a descriptive list of all configurable parameters for PyTorch and Tensorflow
+respectively.
+
+python examples/pytorch/benchmarking/run_benchmark.py --help
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.006   
+google-bert/bert-base-uncased          8               32            0.006   
+google-bert/bert-base-uncased          8              128            0.018   
+google-bert/bert-base-uncased          8              512            0.088     
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1227
+google-bert/bert-base-uncased          8               32            1281
+google-bert/bert-base-uncased          8              128            1307
+google-bert/bert-base-uncased          8              512            1539
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d390aa556680e56435317909a26d2e6b077a630
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 08:58:43.371351
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>bash
+python examples/tensorflow/benchmarking/run_benchmark_tf.py --help
+
+An instantiated benchmark object can then simply be run by calling benchmark.run().
+
+results = benchmark.run()
+print(results)
+results = benchmark.run()
+print(results)
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length     Time in s
+google-bert/bert-base-uncased          8               8             0.005
+google-bert/bert-base-uncased          8               32            0.008
+google-bert/bert-base-uncased          8              128            0.022
+google-bert/bert-base-uncased          8              512            0.105
+
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length    Memory in MB
+google-bert/bert-base-uncased          8               8             1330
+google-bert/bert-base-uncased          8               32            1330
+google-bert/bert-base-uncased          8              128            1330
+google-bert/bert-base-uncased          8              512            1770
+
+====================        ENVIRONMENT INFORMATION         ====================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4e10172426e6c9535aea713b899f513f66b5761
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_3.txt
@@ -0,0 +1,34 @@
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:26:35.617317
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+By default, the time and the required memory for inference are benchmarked. In the example output above the first
+two sections show the result corresponding to inference time and inference memory. In addition, all relevant
+information about the computing environment, e.g. the GPU type, the system, the library versions, etc are printed
+out in the third section under ENVIRONMENT INFORMATION. This information can optionally be saved in a .csv file
+when adding the argument save_to_csv=True to [PyTorchBenchmarkArguments] and
+[TensorFlowBenchmarkArguments] respectively. In this case, every section is saved in a separate
+.csv file. The path to each .csv file can optionally be defined via the argument data classes.
+Instead of benchmarking pre-trained models via their model identifier, e.g. google-bert/bert-base-uncased, the user can
+alternatively benchmark an arbitrary configuration of any available model class. In this case, a list of
+configurations must be inserted with the benchmark args as follows.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c40c26c8838c96cfffe42aeb3ced8f819248cc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+from transformers import PyTorchBenchmark, PyTorchBenchmarkArguments, BertConfig
+args = PyTorchBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = PyTorchBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8              128            0.006
+bert-base                  8              512            0.006
+bert-base                  8              128            0.018   
+bert-base                  8              512            0.088   
+bert-384-hid              8               8             0.006   
+bert-384-hid              8               32            0.006   
+bert-384-hid              8              128            0.011   
+bert-384-hid              8              512            0.054   
+bert-6-lay                 8               8             0.003   
+bert-6-lay                 8               32            0.004   
+bert-6-lay                 8              128            0.009   
+bert-6-lay                 8              512            0.044
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c593dfeaa220d94230e2564f230941aa2f957027
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_5.txt
@@ -0,0 +1,42 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1277
+bert-base                  8               32            1281
+bert-base                  8              128            1307   
+bert-base                  8              512            1539   
+bert-384-hid              8               8             1005   
+bert-384-hid              8               32            1027   
+bert-384-hid              8              128            1035   
+bert-384-hid              8              512            1255   
+bert-6-lay                 8               8             1097   
+bert-6-lay                 8               32            1101   
+bert-6-lay                 8              128            1127   
+bert-6-lay                 8              512            1359
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: PyTorch
+use_torchscript: False
+framework_version: 1.4.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:35:25.143267
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+</pt>
+<tf>py
+
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ef3e693202b9bc26f68e9d41af6faa5904ff771
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_6.txt
@@ -0,0 +1,25 @@
+from transformers import TensorFlowBenchmark, TensorFlowBenchmarkArguments, BertConfig
+
+args = TensorFlowBenchmarkArguments(
+     models=["bert-base", "bert-384-hid", "bert-6-lay"], batch_sizes=[8], sequence_lengths=[8, 32, 128, 512]
+ )
+config_base = BertConfig()
+config_384_hid = BertConfig(hidden_size=384)
+config_6_lay = BertConfig(num_hidden_layers=6)
+benchmark = TensorFlowBenchmark(args, configs=[config_base, config_384_hid, config_6_lay])
+benchmark.run()
+====================       INFERENCE - SPEED - RESULT       ====================
+
+Model Name             Batch Size     Seq Length       Time in s
+bert-base                  8               8             0.005
+bert-base                  8               32            0.008
+bert-base                  8              128            0.022
+bert-base                  8              512            0.106
+bert-384-hid              8               8             0.005
+bert-384-hid              8               32            0.007
+bert-384-hid              8              128            0.018
+bert-384-hid              8              512            0.064
+bert-6-lay                 8               8             0.002
+bert-6-lay                 8               32            0.003
+bert-6-lay                 8              128            0.0011
+bert-6-lay                 8              512            0.074
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7082dfd3e53aab0d94d1c5a0be59374217109351
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+====================      INFERENCE - MEMORY - RESULT       ====================
+Model Name             Batch Size     Seq Length      Memory in MB
+bert-base                  8               8             1330
+bert-base                  8               32            1330
+bert-base                  8              128            1330
+bert-base                  8              512            1770
+bert-384-hid              8               8             1330
+bert-384-hid              8               32            1330
+bert-384-hid              8              128            1330
+bert-384-hid              8              512            1540
+bert-6-lay                 8               8             1330
+bert-6-lay                 8               32            1330
+bert-6-lay                 8              128            1330
+bert-6-lay                 8              512            1540
+
+====================        ENVIRONMENT INFORMATION         ====================
+
+transformers_version: 2.11.0
+framework: Tensorflow
+use_xla: False
+framework_version: 2.2.0
+python_version: 3.6.10
+system: Linux
+cpu: x86_64
+architecture: 64bit
+date: 2020-06-29
+time: 09:38:15.487125
+fp16: False
+use_multiprocessing: True
+only_pretrain_model: False
+cpu_ram_mb: 32088
+use_gpu: True
+num_gpus: 1
+gpu: TITAN RTX
+gpu_ram_mb: 24217
+gpu_power_watts: 280.0
+gpu_performance_state: 2
+use_tpu: False
+
+Again, inference time and required memory for inference are measured, but this time for customized configurations
+of the BertModel class. This feature can especially be helpful when deciding for which configuration the model
+should be trained.
+Benchmark best practices
+This section lists a couple of best practices one should be aware of when benchmarking a model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..373c3257888c5a43c79d133d31ba59c92d488fd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_benchmarks.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+Currently, only single device benchmarking is supported. When benchmarking on GPU, it is recommended that the user
+  specifies on which device the code should be run by setting the CUDA_VISIBLE_DEVICES environment variable in the
+  shell, e.g. export CUDA_VISIBLE_DEVICES=0 before running the code.
+The option no_multi_processing should only be set to True for testing and debugging. To ensure accurate
+  memory measurement it is recommended to run each memory benchmark in a separate process by making sure
+  no_multi_processing is set to True.
+One should always state the environment information when sharing the results of a model benchmark. Results can vary
+  heavily between different GPU devices, library versions, etc., so that benchmark results on their own are not very
+  useful for the community.
+
+Sharing your benchmark
+Previously all available core models (10 at the time) have been benchmarked for inference time, across many different
+settings: using PyTorch, with and without TorchScript, using TensorFlow, with and without XLA. All of those tests were
+done across CPUs (except for TensorFlow XLA) and GPUs.
+The approach is detailed in the following blogpost and the results are
+available here.
+With the new benchmark tools, it is easier than ever to share your benchmark results with the community
+
+PyTorch Benchmarking Results.
+TensorFlow Benchmarking Results.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_bertology.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_bertology.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a3ec657c3a7176526ec66941579549dfe87cb22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_bertology.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+BERTology
+There is a growing field of study concerned with investigating the inner working of large-scale transformers like BERT
+(that some call "BERTology"). Some good examples of this field are:
+
+BERT Rediscovers the Classical NLP Pipeline by Ian Tenney, Dipanjan Das, Ellie Pavlick:
+  https://arxiv.org/abs/1905.05950
+Are Sixteen Heads Really Better than One? by Paul Michel, Omer Levy, Graham Neubig: https://arxiv.org/abs/1905.10650
+What Does BERT Look At? An Analysis of BERT's Attention by Kevin Clark, Urvashi Khandelwal, Omer Levy, Christopher D.
+  Manning: https://arxiv.org/abs/1906.04341
+CAT-probing: A Metric-based Approach to Interpret How Pre-trained Models for Programming Language Attend Code Structure: https://arxiv.org/abs/2210.04633
+
+In order to help this new field develop, we have included a few additional features in the BERT/GPT/GPT-2 models to
+help people access the inner representations, mainly adapted from the great work of Paul Michel
+(https://arxiv.org/abs/1905.10650):
+
+accessing all the hidden-states of BERT/GPT/GPT-2,
+accessing all the attention weights for each head of BERT/GPT/GPT-2,
+retrieving heads output values and gradients to be able to compute head importance score and prune head as explained
+  in https://arxiv.org/abs/1905.10650.
+
+To help you understand and use these features, we have added a specific example script: bertology.py while extract information and prune a model pre-trained on
+GLUE.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56864115a56a5ba2da8bff56f0cbbb3819b5df99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Instantiate a big model
+A barrier to accessing very large pretrained models is the amount of memory required. When loading a pretrained PyTorch model, you usually:
+
+Create a model with random weights.
+Load your pretrained weights.
+Put those pretrained weights in the model.
+
+The first two steps both require a full version of the model in memory and if the model weighs several GBs, you may not have enough memory for two copies of it. This problem is amplified in distributed training environments because each process loads a pretrained model and stores two copies in memory.
+
+[!TIP]
+The randomly created model is initialized with "empty" tensors, which take space in memory without filling it. The random values are whatever was in this chunk of memory at the time. To improve loading speed, the _fast_init parameter is set to True by default to skip the random initialization for all weights that are correctly loaded.
+
+This guide will show you how Transformers can help you load large pretrained models despite their memory requirements.
+Sharded checkpoints
+From Transformers v4.18.0, a checkpoint larger than 10GB is automatically sharded by the [~PreTrainedModel.save_pretrained] method. It is split into several smaller partial checkpoints and creates an index file that maps parameter names to the files they're stored in.
+The maximum shard size is controlled with the max_shard_size parameter, but by default it is 5GB, because it is easier to run on free-tier GPU instances without running out of memory.
+For example, let's shard BioMistral/BioMistral-7B.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     print(sorted(os.listdir(tmp_dir)))
+['config.json', 'generation_config.json', 'model-00001-of-00006.safetensors', 'model-00002-of-00006.safetensors', 'model-00003-of-00006.safetensors', 'model-00004-of-00006.safetensors', 'model-00005-of-00006.safetensors', 'model-00006-of-00006.safetensors', 'model.safetensors.index.json']
+
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..559daf377f393578124f21f437a4bf1485036d01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+The sharded checkpoint is reloaded with the [~PreTrainedModel.from_pretrained] method.
+
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     new_model = AutoModel.from_pretrained(tmp_dir)
+
+The main advantage of sharded checkpoints for big models is that each shard is loaded after the previous one, which caps the memory usage to only the model size and the largest shard size.
+You could also directly load a sharded checkpoint inside a model without the [~PreTrainedModel.from_pretrained] method (similar to PyTorch's load_state_dict() method for a full checkpoint). In this case, use the [~modeling_utils.load_sharded_checkpoint] method.
+
+from transformers.modeling_utils import load_sharded_checkpoint
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     load_sharded_checkpoint(model, tmp_dir)
+
+Shard metadata
+The index file determines which keys are in the checkpoint and where the corresponding weights are stored. This file is loaded like any other JSON file and you can get a dictionary from it.
+
+import json
+with tempfile.TemporaryDirectory() as tmp_dir:
+     model.save_pretrained(tmp_dir, max_shard_size="5GB")
+     with open(os.path.join(tmp_dir, "model.safetensors.index.json"), "r") as f:
+         index = json.load(f)
+print(index.keys())
+dict_keys(['metadata', 'weight_map'])
+
+The metadata key provides the total model size.
+
+index["metadata"]
+{'total_size': 28966928384}
+
+The weight_map key maps each parameter name (typically state_dict in a PyTorch model) to the shard it's stored in.
+
+index["weight_map"]
+{'lm_head.weight': 'model-00006-of-00006.safetensors',
+ 'model.embed_tokens.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.input_layernorm.weight': 'model-00001-of-00006.safetensors',
+ 'model.layers.0.mlp.down_proj.weight': 'model-00001-of-00006.safetensors',
+ 
+}
+
+Accelerate's Big Model Inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..762c8d35538e32ec6bfb283a89d84fc5e66c0933
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Accelerate's Big Model Inference
+
+[!TIP]
+Make sure you have Accelerate v0.9.0 or later and PyTorch v1.9.0 or later installed.
+
+From Transformers v4.20.0, the [~PreTrainedModel.from_pretrained] method is supercharged with Accelerate's Big Model Inference feature to efficiently handle really big models! Big Model Inference creates a model skeleton on PyTorch's meta device. The randomly initialized parameters are only created when the pretrained weights are loaded. This way, you aren't keeping two copies of the model in memory at the same time (one for the randomly initialized model and one for the pretrained weights), and the maximum memory consumed is only the full model size.
+To enable Big Model Inference in Transformers, set low_cpu_mem_usage=True in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", low_cpu_mem_usage=True)
+
+Accelerate automatically dispatches the model weights across all available devices, starting with the fastest device (GPU) first and then offloading to the slower devices (CPU and even hard drive). This is enabled by setting device_map="auto" in the [~PreTrainedModel.from_pretrained] method. When you pass the device_map parameter, low_cpu_mem_usage is automatically set to True so you don't need to specify it.
+
+from transformers import AutoModelForCausalLM
+these loading methods are equivalent
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", low_cpu_mem_usage=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3fc418629b66e631b433d22f7e620574c5487ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_3.txt
@@ -0,0 +1,44 @@
+You can also write your own device_map by mapping each layer to a device. It should map all model parameters to a device, but you don't have to detail where all the submodules of a layer go if the entire layer is on the same device.
+python
+device_map = {"model.layers.1": 0, "model.layers.14": 1, "model.layers.31": "cpu", "lm_head": "disk"}
+Access hf_device_map attribute to see how Accelerate split the model across devices.
+py
+gemma.hf_device_map
+python out
+{'model.embed_tokens': 0,
+ 'model.layers.0': 0,
+ 'model.layers.1': 0,
+ 'model.layers.2': 0,
+ 'model.layers.3': 0,
+ 'model.layers.4': 0,
+ 'model.layers.5': 0,
+ 'model.layers.6': 0,
+ 'model.layers.7': 0,
+ 'model.layers.8': 0,
+ 'model.layers.9': 0,
+ 'model.layers.10': 0,
+ 'model.layers.11': 0,
+ 'model.layers.12': 0,
+ 'model.layers.13': 0,
+ 'model.layers.14': 'cpu',
+ 'model.layers.15': 'cpu',
+ 'model.layers.16': 'cpu',
+ 'model.layers.17': 'cpu',
+ 'model.layers.18': 'cpu',
+ 'model.layers.19': 'cpu',
+ 'model.layers.20': 'cpu',
+ 'model.layers.21': 'cpu',
+ 'model.layers.22': 'cpu',
+ 'model.layers.23': 'cpu',
+ 'model.layers.24': 'cpu',
+ 'model.layers.25': 'cpu',
+ 'model.layers.26': 'cpu',
+ 'model.layers.27': 'cpu',
+ 'model.layers.28': 'cpu',
+ 'model.layers.29': 'cpu',
+ 'model.layers.30': 'cpu',
+ 'model.layers.31': 'cpu',
+ 'model.norm': 'cpu',
+ 'lm_head': 'cpu'}
+Model data type
+PyTorch model weights are normally instantiated as torch.float32 and it can be an issue if you try to load a model as a different data type. For example, you'd need twice as much memory to load the weights in torch.float32 and then again to load them in your desired data type, like torch.float16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29f3efa3d80726454e78423a742942bae7126c59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_big_models.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+[!WARNING]
+Due to how PyTorch is designed, the torch_dtype parameter only supports floating data types.
+
+To avoid wasting memory like this, explicitly set the torch_dtype parameter to the desired data type or set torch_dtype="auto" to load the weights with the most optimal memory pattern (the data type is automatically derived from the model weights).
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype=torch.float16)
+
+from transformers import AutoModelForCausalLM
+gemma = AutoModelForCausalLM.from_pretrained("google/gemma-7b", torch_dtype="auto")
+
+You can also set the data type to use for models instantiated from scratch.
+thon
+import torch
+from transformers import AutoConfig, AutoModel
+my_config = AutoConfig.from_pretrained("google/gemma-2b", torch_dtype=torch.float16)
+model = AutoModel.from_config(my_config)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d856e00b76acf1f6108dc702b1653b0fee7ccbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Templates for Chat Models
+Introduction
+An increasingly common use case for LLMs is chat. In a chat context, rather than continuing a single string
+of text (as is the case with a standard language model), the model instead continues a conversation that consists
+of one or more messages, each of which includes a role, like "user" or "assistant", as well as message text.
+Much like tokenization, different models expect very different input formats for chat. This is the reason we added
+chat templates as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, 
+represented as lists of messages, into a single tokenizable string in the format that the model expects. 
+Let's make this concrete with a quick example using the BlenderBot model. BlenderBot has an extremely simple default 
+template, which mostly just adds whitespace between rounds of dialogue:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+chat = [
+    {"role": "user", "content": "Hello, how are you?"},
+    {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+    {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+" Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!"
+
+Notice how the entire chat is condensed into a single string. If we use tokenize=True, which is the default setting,
+that string will also be tokenized for us. To see a more complex template in action, though, let's use the 
+mistralai/Mistral-7B-Instruct-v0.1 model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7e2bc6b440a7c183de10ca79aee119175ffe080
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+chat = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "I'd like to show off how chat templating works!"},
+ ]
+tokenizer.apply_chat_template(chat, tokenize=False)
+"[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..730e7dc6d6b310248c91f2713a28b745280bcd89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_10.txt
@@ -0,0 +1,17 @@
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
+conversation. Since we're just using a dummy function for this example that always returns 22.0, we can just append 
+that result directly. Again, note the tool_call_id - this should match the ID used in the tool call above.
+python
+messages.append({"role": "tool", "tool_call_id": tool_call_id, "name": "get_current_temperature", "content": "22.0"})
+Finally, let's let the assistant read the function outputs and continue chatting with the user:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|>
+Although this was a simple demo with dummy tools and a single call, the same technique works with 
+multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational
+agents with real-time information, computational tools like calculators, or access to large databases.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..212152eda281ae56fb5bf45d10c553ff7b494ce2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_11.txt
@@ -0,0 +1,30 @@
+Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and
+match tool calls to results using the ordering, and there are several models that use neither and only issue one tool 
+call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we 
+recommend structuring your tools calls like we've shown here, and returning tool results in the order that
+they were issued by the model. The chat templates on each model should handle the rest.
+
+Understanding tool schemas
+Each function you pass to the tools argument of apply_chat_template is converted into a 
+JSON schema. These schemas
+are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they
+never see the actual code inside them. What they care about is the function definitions and the arguments they
+need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you
+to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and
+return the response in the chat.
+Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions
+follow the specification above, but if you encounter problems, or you simply want more control over the conversion, 
+you can handle the conversion manually. Here is an example of a manual schema conversion.
+thon
+from transformers.utils import get_json_schema
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+schema = get_json_schema(multiply)
+print(schema)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24d4480a5f0594f4f755fac7b89478db45f4921d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_12.txt
@@ -0,0 +1,71 @@
+schema = get_json_schema(multiply)
+print(schema)
+
+This will yield:
+json
+{
+  "type": "function", 
+  "function": {
+    "name": "multiply", 
+    "description": "A function that multiplies two numbers", 
+    "parameters": {
+      "type": "object", 
+      "properties": {
+        "a": {
+          "type": "number", 
+          "description": "The first number to multiply"
+        }, 
+        "b": {
+          "type": "number",
+          "description": "The second number to multiply"
+        }
+      }, 
+      "required": ["a", "b"]
+    }
+  }
+}
+If you wish, you can edit these schemas, or even write them from scratch yourself without using get_json_schema at 
+all. JSON schemas can be passed directly to the tools argument of 
+apply_chat_template - this gives you a lot of power to define precise schemas for more complex functions. Be careful,
+though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We 
+recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) 
+to a minimum.
+Here is an example of defining schemas by hand, and passing them directly to apply_chat_template:
+thon
+A simple function that takes no arguments
+current_time = {
+  "type": "function", 
+  "function": {
+    "name": "current_time",
+    "description": "Get the current local time as a string.",
+    "parameters": {
+      'type': 'object',
+      'properties': {}
+    }
+  }
+}
+A more complete function that takes two numerical arguments
+multiply = {
+  'type': 'function',
+  'function': {
+    'name': 'multiply',
+    'description': 'A function that multiplies two numbers', 
+    'parameters': {
+      'type': 'object', 
+      'properties': {
+        'a': {
+          'type': 'number',
+          'description': 'The first number to multiply'
+        }, 
+        'b': {
+          'type': 'number', 'description': 'The second number to multiply'
+        }
+      }, 
+      'required': ['a', 'b']
+    }
+  }
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools = [current_time, multiply]
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd40a09c04b27a1829592d5b0c83fa10312b204e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_13.txt
@@ -0,0 +1,31 @@
+Advanced: Retrieval-augmented generation
+"Retrieval-augmented generation" or "RAG" LLMs can search a corpus of documents for information before responding
+to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our 
+recommendation for RAG models is that their template
+should accept a documents argument. This should be a list of documents, where each "document"
+is a single dict with title and contents keys, both of which are strings. Because this format is much simpler
+than the JSON schemas used for tools, no helper functions are necessary.
+Here's an example of a RAG template in action:
+thon
+document1 = {
+    "title": "The Moon: Our Age-Old Foe",
+    "contents": "Man has always dreamed of destroying the moon. In this essay, I shall"
+}
+document2 = {
+    "title": "The Sun: Our Age-Old Friend",
+    "contents": "Although often underappreciated, the sun provides several notable benefits"
+}
+model_input = tokenizer.apply_chat_template(
+    messages,
+    documents=[document1, document2]
+)
+
+Advanced: How do chat templates work?
+The chat template for a model is stored on the tokenizer.chat_template attribute. If no chat template is set, the
+default template for that model class is used instead. Let's take a look at the template for BlenderBot:
+thon
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
+tokenizer.default_chat_template
+"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e5cdf3328217ad25b730899cba0e1a914a29e8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_14.txt
@@ -0,0 +1,35 @@
+That's kind of intimidating. Let's clean it up a little to make it more readable. In the process, though, we also make
+sure that the newlines and indentation we add don't end up being included in the template output - see the tip on
+trimming whitespace below!
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- ' ' }}
+    {%- endif %}
+    {{- message['content'] }}
+    {%- if not loop.last %}
+        {{- '  ' }}
+    {%- endif %}
+{%- endfor %}
+{{- eos_token }}
+If you've never seen one of these before, this is a Jinja template.
+Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and
+syntax resembles Python. In pure Python, this template would look something like this:
+python
+for idx, message in enumerate(messages):
+    if message['role'] == 'user':
+        print(' ')
+    print(message['content'])
+    if not idx == len(messages) - 1:  # Check for the last message in the conversation
+        print('  ')
+print(eos_token)
+Effectively, the template does three things:
+1. For each message, if the message is a user message, add a blank space before it, otherwise print nothing.
+2. Add the message content
+3. If the message is not the last message, add two spaces after it. After the final message, print the EOS token.
+This is a pretty simple template - it doesn't add any control tokens, and it doesn't support "system" messages, which 
+are a common way to give the model directives about how it should behave in the subsequent conversation.
+But Jinja gives you a lot of flexibility to do those things! Let's see a Jinja template that can format inputs
+similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0547ee4c57ff0841a4f37170c7cd24e7c373a35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_15.txt
@@ -0,0 +1,28 @@
+messages and slightly different system message handling in general - don't use this one in your actual code!)
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'] + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- ' '  + message['content'] + ' ' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based
+on the "role" of each message, which represents who sent it. User, assistant and system messages are clearly
+distinguishable to the model because of the tokens they're wrapped in.
+Advanced: Adding and editing chat templates
+How do I create a chat template?
+Simple, just write a jinja template and set tokenizer.chat_template. You may find it easier to start with an 
+existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template
+above and add "[ASST]" and "[/ASST]" to assistant messages:
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '<<SYS>>\\n' + message['content'].strip() + '\\n<</SYS>>\\n\\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '[ASST] '  + message['content'] + ' [/ASST]' + eos_token }}
+    {%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0432647227a3d6b87d7de05d1a2833485f7851f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_16.txt
@@ -0,0 +1,13 @@
+{%- endif %}
+{%- endfor %}
+Now, simply set the tokenizer.chat_template attribute. Next time you use [~PreTrainedTokenizer.apply_chat_template], it will
+use your new template! This attribute will be saved in the tokenizer_config.json file, so you can use
+[~utils.PushToHubMixin.push_to_hub] to upload your new template to the Hub and make sure everyone's using the right
+template for your model!
+python
+template = tokenizer.chat_template
+template = template.replace("SYS", "SYSTEM")  # Change the system token
+tokenizer.chat_template = template  # Set the new template
+tokenizer.push_to_hub("model_name")  # Upload your new template to the Hub!
+The method [~PreTrainedTokenizer.apply_chat_template] which uses your chat template is called by the [TextGenerationPipeline] class, so 
+once you set the correct chat template, your model will automatically become compatible with [TextGenerationPipeline].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f40e2a23592c20543c0d37e96e36645d87bb8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_17.txt
@@ -0,0 +1,5 @@
+If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat
+control tokens as special tokens in the tokenizer. Special tokens are never split, 
+ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You 
+should also set the tokenizer's eos_token attribute to the token that marks the end of assistant generations in your
+template. This will ensure that text generation tools can correctly figure out when to stop generating text.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fef565dae4d4655cead1be7354fc86ea66b0bef3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_18.txt
@@ -0,0 +1,28 @@
+Why do some models have multiple templates?
+Some models use different templates for different use cases. For example, they might use one template for normal chat
+and another for tool-use, or retrieval-augmented generation. In these cases, tokenizer.chat_template is a dictionary.
+This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use
+Jinja statements like if tools is defined and {% macro %} definitions to easily wrap multiple code paths in a
+single template.
+When a tokenizer has multiple templates, tokenizer.chat_template will be a dict, where each key is the name
+of a template. The apply_chat_template method has special handling for certain template names: Specifically, it will
+look for a template named default in most cases, and will raise an error if it can't find one. However, if a template
+named tool_use exists when the user has passed a tools argument, it will use that instead. To access templates
+with other names, pass the name of the template you want to the chat_template argument of
+apply_chat_template().
+We find that this can be a bit confusing for users, though - so if you're writing a template yourself, we recommend
+trying to put it all in a single template where possible!
+What are "default" templates?
+Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards 
+compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a
+model does not have a chat template set, but there is a default template for its model class, the TextGenerationPipeline
+class and methods like apply_chat_template will use the class template instead. You can find out what the default
+template for your tokenizer is by checking the tokenizer.default_chat_template attribute.
+This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when
+the class template is appropriate for your model, we strongly recommend overriding the default template by
+setting the chat_template attribute explicitly to make it clear to users that your model has been correctly configured
+for chat.
+Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..165f07f480067eeb5ecfb177c4bc32ab606957a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_19.txt
@@ -0,0 +1,22 @@
+removed in a future release. We strongly recommend setting the chat_template attribute for any tokenizers that
+still depend on them!
+What template should I use?
+When setting the template for a model that's already been trained for chat, you should ensure that the template
+exactly matches the message formatting that the model saw during training, or else you will probably experience
+performance degradation. This is true even if you're training the model further - you will probably get the best 
+performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the
+best performance for inference or fine-tuning when you precisely match the tokenization used during training.
+If you're training a model from scratch, or fine-tuning a base language model for chat, on the other hand,
+you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different
+input formats. One popular choice is the ChatML format, and this is a good, flexible choice for many use-cases. 
+It looks like this:
+{%- for message in messages %}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}
+{%- endfor %}
+If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes
+handy support for generation prompts, but note that it doesn't add BOS or EOS tokens!
+If your model expects those, they won't be added automatically by apply_chat_template - in other words, the
+text will be tokenized with add_special_tokens=False. This is to avoid potential conflicts between the template and
+the add_special_tokens logic. If your model expects special tokens, make sure to add them to the template!
+python
+tokenizer.chat_template = "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38f47a8a469cba52da04f426ccfe298f931a1965
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of 
+user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.
+How do I use chat templates?
+As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with role
+and content keys, and then pass it to the [~PreTrainedTokenizer.apply_chat_template] method. Once you do that,
+you'll get output that's ready to go! When using chat templates as input for model generation, it's also a good idea
+to use add_generation_prompt=True to add a generation prompt. 
+Here's an example of preparing input for model.generate(), using the Zephyr assistant model:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)  # You may want to use bfloat16 and/or move to GPU here
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+print(tokenizer.decode(tokenized_chat[0]))
+This will yield a string in the input format that Zephyr expects.text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate 
+<|user|>
+How many helicopters can a human eat in one sitting? 
+<|assistant|>
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86d5fb53712477de93e0ae2f2eed654f8004a750
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_20.txt
@@ -0,0 +1,27 @@
+This template wraps each message in <|im_start|> and <|im_end|> tokens, and simply writes the role as a string, which
+allows for flexibility in the roles you train with. The output looks like this:
+text
+<|im_start|>system
+You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|>
+<|im_start|>user
+How are you?<|im_end|>
+<|im_start|>assistant
+I'm doing great!<|im_end|>
+The "user", "system" and "assistant" roles are the standard for chat, and we recommend using them when it makes sense,
+particularly if you want your model to operate well with [TextGenerationPipeline]. However, you are not limited
+to these roles - templating is extremely flexible, and any string can be a role.
+I want to add some chat templates! How should I get started?
+If you have any chat models, you should set their tokenizer.chat_template attribute and test it using
+[~PreTrainedTokenizer.apply_chat_template], then push the updated tokenizer to the Hub. This applies even if you're
+not the model owner - if you're using a model with an empty chat template, or one that's still using the default class
+template, please open a pull request to the model repository so that this attribute can be set properly!
+Once the attribute is set, that's it, you're done! tokenizer.apply_chat_template will now work correctly for that
+model, which means it is also automatically supported in places like TextGenerationPipeline!
+By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of
+open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - 
+it's time to put an end to them!
+Advanced: Template writing tips
+If you're unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first
+write a short Python script that formats messages the way you want, and then convert that script into a template.
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84020a4c36da97eff85d06e657146dc9f7cad3e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_21.txt
@@ -0,0 +1,36 @@
+Remember that the template handler will receive the conversation history as a variable called messages.
+You will be able to access messages in your template just like you can in Python, which means you can loop over 
+it with {% for message in messages %} or access individual messages with {{ messages[0] }}, for example.
+You can also use the following tips to convert your code to Jinja:
+Trimming whitespace
+By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat
+templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing
+your templates like this:
+{%- for message in messages %}
+    {{- message['role'] + message['content'] }}
+{%- endfor %}
+rather than like this:
+{% for message in messages %}
+    {{ message['role'] + message['content'] }}
+{% endfor %}
+Adding - will strip any whitespace that comes before the block. The second example looks innocent, but the newline
+and indentation may end up being included in the output, which is probably not what you want!
+For loops
+For loops in Jinja look like this:
+{%- for message in messages %}
+    {{- message['content'] }}
+{%- endfor %}
+Note that whatever's inside the {{ expression block }} will be printed to the output. You can use operators like
++ to combine strings inside expression blocks.
+If statements
+If statements in Jinja look like this:
+{%- if message['role'] == 'user' %}
+    {{- message['content'] }}
+{%- endif %}
+Note how where Python uses whitespace to mark the beginnings and ends of for and if blocks, Jinja requires you
+to explicitly end them with {% endfor %} and {% endif %}.
+Special variables
+Inside your template, you will have access to the list of messages, but you can also access several other special
+variables. These include special tokens like bos_token and eos_token, as well as the add_generation_prompt
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bba0aa4b9348541054581f36c7dcc423acf27b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_22.txt
@@ -0,0 +1,15 @@
+variable that we discussed above. You can also use the loop variable to access information about the current loop
+iteration, for example  using {% if loop.last %} to check if the current message is the last message in the 
+conversation. Here's an example that puts these ideas together to add a generation prompt at the end of the
+conversation if add_generation_prompt is True:
+{%- if loop.last and add_generation_prompt %}
+    {{- bos_token + 'Assistant:\n' }}
+{%- endif %}
+Compatibility with non-Python Jinja
+There are multiple implementations of Jinja in various languages. They generally have the same syntax,
+but a key difference is that when you're writing a template in Python you can use Python methods, such as
+.lower() on strings or .items() on dicts. This will break if someone tries to use your template on a non-Python
+implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS
+and Rust are very popular. 
+Don't panic, though! There are a few easy changes you can make to your templates to ensure they're compatible across
+all implementations of Jinja:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153602130528a8571bbf5e10c45ff281652ecc95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_23.txt
@@ -0,0 +1,7 @@
+Replace Python methods with Jinja filters. These usually have the same name, for example string.lower() becomes
+  string|lower, and dict.items() becomes dict|items. One notable change is that string.strip() becomes string|trim.
+  See the list of built-in filters
+  in the Jinja documentation for more.
+Replace True, False and None, which are Python-specific, with true, false and none.
+Directly rendering a dict or list may give different results in other implementations (for example, string entries
+  might change from single-quoted to double-quoted). Adding the tojson filter can help to ensure consistency here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79a96b3282a97ea2603d18f27f9c17e51cd66fdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user's question:
+python
+outputs = model.generate(tokenized_chat, max_new_tokens=128) 
+print(tokenizer.decode(outputs[0]))
+This will yield:
+text
+<|system|>
+You are a friendly chatbot who always responds in the style of a pirate</s> 
+<|user|>
+How many helicopters can a human eat in one sitting?</s> 
+<|assistant|>
+Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.
+Arr, 'twas easy after all!
+Is there an automated pipeline for chat?
+Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past,
+we used to use a dedicated "ConversationalPipeline" class, but this has now been deprecated and its functionality
+has been merged into the [TextGenerationPipeline]. Let's try the Zephyr example again, but this time using 
+a pipeline:
+thon
+from transformers import pipeline
+pipe = pipeline("text-generation", "HuggingFaceH4/zephyr-7b-beta")
+messages = [
+    {
+        "role": "system",
+        "content": "You are a friendly chatbot who always responds in the style of a pirate",
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+print(pipe(messages, max_new_tokens=128)[0]['generated_text'][-1])  # Print the assistant's response
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c755b906d35c8238b3d37cf3c00b297000bf4fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_4.txt
@@ -0,0 +1,35 @@
+text
+{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}
+The pipeline will take care of all the details of tokenization and calling apply_chat_template for you -
+once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!
+What are "generation prompts"?
+You may have noticed that the apply_chat_template method has an add_generation_prompt argument. This argument tells
+the template to add tokens that indicate the start of a bot response. For example, consider the following chat:
+python
+messages = [
+    {"role": "user", "content": "Hi there!"},
+    {"role": "assistant", "content": "Nice to meet you!"},
+    {"role": "user", "content": "Can I ask a question?"}
+]
+Here's what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+"""
+And here's what it looks like with a generation prompt:
+python
+tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+"""<|im_start|>user
+Hi there!<|im_end|>
+<|im_start|>assistant
+Nice to meet you!<|im_end|>
+<|im_start|>user
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11fd359d851821dc3e370c51cdce91db77625c80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_5.txt
@@ -0,0 +1,36 @@
+Can I ask a question?<|im_end|>
+<|im_start|>assistant
+"""
+Note that this time, we've added the tokens that indicate the start of a bot response. This ensures that when the model
+generates text it will write a bot response instead of doing something unexpected, like continuing the user's 
+message. Remember, chat models are still just language models - they're trained to continue text, and chat is just a 
+special kind of text to them! You need to guide them with appropriate control tokens, so they know what they're 
+supposed to be doing.
+Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don't have any
+special tokens before bot responses. In these cases, the add_generation_prompt argument will have no effect. The exact
+effect that add_generation_prompt has will depend on the template being used.
+Can I use chat templates in training?
+Yes! We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you
+can simply continue like any other language model training task. When training, you should usually set 
+add_generation_prompt=False, because the added tokens to prompt an assistant response will not be helpful during 
+training. Let's see an example:
+thon
+from transformers import AutoTokenizer
+from datasets import Dataset
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+chat1 = [
+    {"role": "user", "content": "Which is bigger, the moon or the sun?"},
+    {"role": "assistant", "content": "The sun."}
+]
+chat2 = [
+    {"role": "user", "content": "Which is bigger, a virus or a bacterium?"},
+    {"role": "assistant", "content": "A bacterium."}
+]
+dataset = Dataset.from_dict({"chat": [chat1, chat2]})
+dataset = dataset.map(lambda x: {"formatted_chat": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
+print(dataset['formatted_chat'][0])
+And we get:text
+<|user|>
+Which is bigger, the moon or the sun?
+<|assistant|>
+The sun.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edbc8b4a0bc6debac71044407d88118cdd6889ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_6.txt
@@ -0,0 +1,36 @@
+From here, just continue training like you would with a standard language modelling task, using the formatted_chat column.
+Advanced: Extra inputs to chat templates
+The only argument that apply_chat_template requires is messages. However, you can pass any keyword
+argument to apply_chat_template and it will be accessible inside the template. This gives you a lot of freedom to use
+chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass
+strings, lists, dicts or whatever else you want. 
+That said, there are some common use-cases for these extra arguments,
+such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases,
+we have some opinionated recommendations about what the names and formats of these arguments should be, which are
+described in the sections below. We encourage model authors to make their chat templates compatible with this format,
+to make it easy to transfer tool-calling code between models.
+Advanced: Tool use / function calling
+"Tool use" LLMs can choose to call functions as external tools before generating an answer. When passing tools
+to a tool-use model, you can simply pass a list of functions to the tools argument:
+thon
+import datetime
+def current_time():
+    """Get the current local time as a string."""
+    return str(datetime.now())
+def multiply(a: float, b: float):
+    """
+    A function that multiplies two numbers
+Args:
+    a: The first number to multiply
+    b: The second number to multiply
+"""
+return a * b
+
+tools = [current_time, multiply]
+model_input = tokenizer.apply_chat_template(
+    messages,
+    tools=tools
+)
+
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8dfb32f180770f1c4c23ad255a9f7b82d3aa525
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_7.txt
@@ -0,0 +1,34 @@
+In order for this to work correctly, you should write your functions in the format above, so that they can be parsed
+correctly as tools. Specifically, you should follow these rules:
+
+The function should have a descriptive name
+Every argument must have a type hint
+The function must have a docstring in the standard Google style (in other words, an initial function description
+  followed by an Args: block that describes the arguments, unless the function does not have any arguments. 
+Do not include types in the Args: block. In other words, write a: The first number to multiply, not
+  a (int): The first number to multiply. Type hints should go in the function header instead.
+The function can have a return type and a Returns: block in the docstring. However, these are optional
+  because most tool-use models ignore them.
+
+Passing tool results to the model
+The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use
+one? If that happens, you should:
+
+Parse the model's output to get the tool name(s) and arguments.
+Add the model's tool call(s) to the conversation.
+Call the corresponding function(s) with those arguments.
+Add the result(s) to the conversation
+
+A complete tool use example
+Let's walk through a tool use example, step by step. For this example, we will use an 8B Hermes-2-Pro model,
+as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the
+memory, you can consider using a larger model instead like Command-R
+or Mixtral-8x22B, both of which also support tool use
+and offer even stronger performance.
+First, let's load our model and tokenizer:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "NousResearch/Hermes-2-Pro-Llama-3-8B"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision="pr/13")
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b1d2a1d52e15189ab12354f3f1858a5564d8677
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_8.txt
@@ -0,0 +1,24 @@
+Next, let's define a list of tools:
+thon
+def get_current_temperature(location: str, unit: str) -> float:
+    """
+    Get the current temperature at a location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+    unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"])
+Returns:
+    The current temperature at the specified location in the specified units, as a float.
+"""
+return 22.  # A real function should probably actually get the temperature!
+
+def get_current_wind_speed(location: str) -> float:
+    """
+    Get the current wind speed in km/h at a given location.
+Args:
+    location: The location to get the temperature for, in the format "City, Country"
+Returns:
+    The current wind speed at the given location in km/h, as a float.
+"""
+return 6.  # A real function should probably actually get the wind speed!
+
+tools = [get_current_temperature, get_current_wind_speed]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a495527a7ac397c45607d2706a7286ce43b0c4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_chat_templating.txt_chunk_9.txt
@@ -0,0 +1,28 @@
+Now, let's set up a conversation for our bot:
+python
+messages = [
+  {"role": "system", "content": "You are a bot that responds to weather queries. You should reply with the unit used in the queried location."},
+  {"role": "user", "content": "Hey, what's the temperature in Paris right now?"}
+]
+Now, let's apply the chat template and generate a response:
+python
+inputs = tokenizer.apply_chat_template(messages, chat_template="tool_use", tools=tools, add_generation_prompt=True, return_dict=True, return_tensors="pt")
+inputs = {k: v.to(model.device) for k, v in inputs.items()}
+out = model.generate(**inputs, max_new_tokens=128)
+print(tokenizer.decode(out[0][len(inputs["input_ids"][0]):]))
+And we get:
+text
+<tool_call>
+{"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"}
+</tool_call><|im_end|>
+The model has called the function with valid arguments, in the format requested by the function docstring. It has
+inferred that we're most likely referring to the Paris in France, and it remembered that, as the home of SI units,
+the temperature in France should certainly be displayed in Celsius.
+Let's append the model's tool call to the conversation. Note that we generate a random tool_call_id here. These IDs
+are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response
+corresponds to which call. You can generate them any way you like, but they should be unique within each chat.
+python
+tool_call_id = "vAHdf3"  # Random ID, should be unique for each tool call
+tool_call = {"name": "get_current_temperature", "arguments": {"location": "Paris, France", "unit": "celsius"}}
+messages.append({"role": "assistant", "tool_calls": [{"id": tool_call_id, "type": "function", "function": tool_call}]})
+Now that we've added the tool call to the conversation, we can call the function and append the result to the
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35e36807cd5ab7eed28c4d956ac070fe5797a814
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Community
+This page regroups resources around 🤗 Transformers developed by the community.
+Community resources:
+| Resource     |      Description      |      Author      |
+|:----------|:-------------|------:|
+| Hugging Face Transformers Glossary Flashcards | A set of flashcards based on the Transformers Docs Glossary that has been put into a form which can be easily learned/revised using Anki an open source, cross platform app specifically designed for long term knowledge retention. See this Introductory video on how to use the flashcards. | Darigov Research |
+Community notebooks:
+| Notebook     |      Description      |      Author      |      |
+|:----------|:-------------|:-------------|------:|
+| Fine-tune a pre-trained Transformer to generate lyrics | How to generate lyrics in the style of your favorite artist by fine-tuning a GPT-2 model |  Aleksey Korshuk |  |
+| Train T5 in Tensorflow 2 | How to train T5 for any task using Tensorflow 2. This notebook demonstrates a Question & Answer task implemented in Tensorflow 2 using SQUAD | Muhammad Harris | |
+| Train T5 on TPU  | How to train T5 on SQUAD with Transformers and Nlp | Suraj Patil | |
+| Fine-tune T5 for Classification and Multiple Choice  | How to fine-tune T5 for classification and multiple choice tasks using a text-to-text format with PyTorch Lightning |  Suraj Patil |  |
+| Fine-tune DialoGPT on New Datasets and Languages  | How to fine-tune the DialoGPT model on a new dataset for open-dialog conversational chatbots |  Nathan Cooper |  |
+| Long Sequence Modeling with Reformer  | How to train on sequences as long as 500,000 tokens with Reformer |  Patrick von Platen |   |
+| Fine-tune BART for Summarization | How to fine-tune BART for summarization with fastai using blurr | Wayde Gilliam |  |
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b070420634a37c5fd15167939d0b2e1252239f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+| Fine-tune a pre-trained Transformer on anyone's tweets | How to generate tweets in the style of your favorite Twitter account by fine-tuning a GPT-2 model |  Boris Dayma |  |
+| Optimize 🤗 Hugging Face models with Weights & Biases | A complete tutorial showcasing W&B integration with Hugging Face | Boris Dayma |  |
+| Pretrain Longformer  | How to build a "long" version of existing pretrained models |  Iz Beltagy |  |
+| Fine-tune Longformer for QA | How to fine-tune longformer model for QA task | Suraj Patil |  |
+| Evaluate Model with 🤗nlp | How to evaluate longformer on TriviaQA with nlp | Patrick von Platen |  |
+| Fine-tune T5 for Sentiment Span Extraction  | How to fine-tune T5 for sentiment span extraction using a text-to-text format with PyTorch Lightning |  Lorenzo Ampil |  |
+| Fine-tune DistilBert for Multiclass Classification | How to fine-tune DistilBert for multiclass classification with PyTorch | Abhishek Kumar Mishra | |
+|Fine-tune BERT for Multi-label Classification|How to fine-tune BERT for multi-label classification using PyTorch|Abhishek Kumar Mishra ||
+|Fine-tune T5 for Summarization|How to fine-tune T5 for summarization in PyTorch and track experiments with WandB|Abhishek Kumar Mishra ||
+|Speed up Fine-Tuning in Transformers with Dynamic Padding / Bucketing|How to speed up fine-tuning by a factor of 2 using dynamic padding / bucketing|Michael Benesty ||
+|Pretrain Reformer for Masked Language Modeling| How to train a Reformer model with bi-directional self-attention layers | Patrick von Platen | |
+|Expand and Fine Tune Sci-BERT| How to increase vocabulary of a pretrained SciBERT model from AllenAI on the CORD dataset and pipeline it. | Tanmay Thakur | |
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2b43f0550e550d8d25cabbc3f098616cddda5a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+|Fine Tune BlenderBotSmall for Summarization using the Trainer API| How to fine-tune BlenderBotSmall for summarization on a custom dataset, using the Trainer API. | Tanmay Thakur | |
+|Fine-tune Electra and interpret with Integrated Gradients | How to fine-tune Electra for sentiment analysis and interpret predictions with Captum Integrated Gradients | Eliza Szczechla | |
+|fine-tune a non-English GPT-2 Model with Trainer class | How to fine-tune a non-English GPT-2 Model with Trainer class | Philipp Schmid | |
+|Fine-tune a DistilBERT Model for Multi Label Classification task | How to fine-tune a DistilBERT Model for Multi Label Classification task | Dhaval Taunk | |
+|Fine-tune ALBERT for sentence-pair classification | How to fine-tune an ALBERT model or another BERT-based model for the sentence-pair classification task | Nadir El Manouzi | |
+|Fine-tune Roberta for sentiment analysis | How to fine-tune a Roberta model for sentiment analysis | Dhaval Taunk | |
+|Evaluating Question Generation Models | How accurate are the answers to questions generated by your seq2seq transformer model? | Pascal Zoleko | |
+|Classify text with DistilBERT and Tensorflow | How to fine-tune DistilBERT for text classification in TensorFlow | Peter Bayerle | |
+|Leverage BERT for Encoder-Decoder Summarization on CNN/Dailymail | How to warm-start a EncoderDecoderModel with a google-bert/bert-base-uncased checkpoint for summarization on CNN/Dailymail | Patrick von Platen | |
+|Leverage RoBERTa for Encoder-Decoder Summarization on BBC XSum | How to warm-start a shared EncoderDecoderModel with a FacebookAI/roberta-base checkpoint for summarization on BBC/XSum | Patrick von Platen | |
+|Fine-tune TAPAS on Sequential Question Answering (SQA) | How to fine-tune TapasForQuestionAnswering with a tapas-base checkpoint on the Sequential Question Answering (SQA) dataset | Niels Rogge | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7668b2a7046aad07062a6bcc9af0b61a5dd1e10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+|Evaluate TAPAS on Table Fact Checking (TabFact) | How to evaluate a fine-tuned TapasForSequenceClassification with a tapas-base-finetuned-tabfact checkpoint using a combination of the 🤗 datasets and 🤗 transformers libraries | Niels Rogge | |
+|Fine-tuning mBART for translation | How to fine-tune mBART using Seq2SeqTrainer for Hindi to English translation | Vasudev Gupta | |
+|Fine-tune LayoutLM on FUNSD (a form understanding dataset) | How to fine-tune LayoutLMForTokenClassification on the FUNSD dataset for information extraction from scanned documents | Niels Rogge | |
+|Fine-Tune DistilGPT2 and Generate Text | How to fine-tune DistilGPT2 and generate text | Aakash Tripathi | |
+|Fine-Tune LED on up to 8K tokens | How to fine-tune LED on pubmed for long-range summarization | Patrick von Platen | |
+|Evaluate LED on Arxiv | How to effectively evaluate LED on long-range summarization | Patrick von Platen | |
+|Fine-tune LayoutLM on RVL-CDIP (a document image classification dataset) | How to fine-tune LayoutLMForSequenceClassification on the RVL-CDIP dataset for scanned document classification | Niels Rogge | |
+|Wav2Vec2 CTC decoding with GPT2 adjustment | How to decode CTC sequence with language model adjustment | Eric Lam | |
+|Fine-tune BART for summarization in two languages with Trainer class | How to fine-tune BART for summarization in two languages with Trainer class | Eliza Szczechla | |
+|Evaluate Big Bird on Trivia QA | How to evaluate BigBird on long document question answering on Trivia QA | Patrick von Platen | |
+| Create video captions using Wav2Vec2 | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | Niklas Muennighoff | |
+| Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | Niels Rogge | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b9ecee6e1d2fb9d1b8cf4f0f31e4aab2f9158e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_community.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+| Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | Niels Rogge | |
+| Evaluate LUKE on Open Entity, an entity typing dataset | How to evaluate LukeForEntityClassification on the Open Entity dataset | Ikuya Yamada | |
+| Evaluate LUKE on TACRED, a relation extraction dataset | How to evaluate LukeForEntityPairClassification on the TACRED dataset | Ikuya Yamada | |
+| Evaluate LUKE on CoNLL-2003, an important NER benchmark | How to evaluate LukeForEntitySpanClassification on the CoNLL-2003 dataset | Ikuya Yamada | |
+| Evaluate BigBird-Pegasus on PubMed dataset | How to evaluate BigBirdPegasusForConditionalGeneration on PubMed dataset | Vasudev Gupta |  |
+| Speech Emotion Classification with Wav2Vec2 | How to leverage a pretrained Wav2Vec2 model for Emotion Classification on the MEGA dataset | Mehrdad Farahani |  |
+| Detect objects in an image with DETR | How to use a trained DetrForObjectDetection model to detect objects in an image and visualize attention | Niels Rogge |  |
+| Fine-tune DETR on a custom object detection dataset | How to fine-tune DetrForObjectDetection on a custom object detection dataset | Niels Rogge |  |
+| Finetune T5 for Named Entity Recognition | How to fine-tune T5 on a Named Entity Recognition Task | Ogundepo Odunayo |  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec3eb1db4b95dd89c6e50dd58dc7ced9407a3c78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Contribute to 🤗 Transformers
+Everyone is welcome to contribute, and we value everybody's contribution. Code
+contributions are not the only way to help the community. Answering questions, helping
+others, and improving the documentation are also immensely valuable.
+It also helps us if you spread the word! Reference the library in blog posts
+about the awesome projects it made possible, shout out on Twitter every time it has
+helped you, or simply ⭐️ the repository to say thank you.
+However you choose to contribute, please be mindful and respect our
+code of conduct.
+This guide was heavily inspired by the awesome scikit-learn guide to contributing.
+Ways to contribute
+There are several ways you can contribute to 🤗 Transformers:
+
+Fix outstanding issues with the existing code.
+Submit issues related to bugs or desired new features.
+Implement new models.
+Contribute to the examples or to the documentation.
+
+If you don't know where to start, there is a special Good First
+Issue listing. It will give you a list of
+open issues that are beginner-friendly and help you start contributing to open-source. The best way to do that is to open a Pull Request and link it to the issue that you'd like to work on. We try to give priority to opened PRs as we can easily track the progress of the fix, and if the contributor does not have time anymore, someone else can take the PR over.
+For something slightly more challenging, you can also take a look at the Good Second Issue list. In general though, if you feel like you know what you're doing, go for it and we'll help you get there! 🚀
+
+All contributions are equally valuable to the community. 🥰
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00165d9ebd618ea390f0d9ebb4e4ba4fd3a74afb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+All contributions are equally valuable to the community. 🥰
+
+Fixing outstanding issues
+If you notice an issue with the existing code and have a fix in mind, feel free to start contributing and open a Pull Request!
+Submitting a bug-related issue or feature request
+Do your best to follow these guidelines when submitting a bug-related issue or a feature
+request. It will make it easier for us to come back to you quickly and with good
+feedback.
+Did you find a bug?
+The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
+Before you report an issue, we would really appreciate it if you could make sure the bug was not
+already reported (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the forum first. This helps us respond quicker to fixing issues related to the library versus general questions.
+Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
+
+Your OS type and version and Python, PyTorch and
+  TensorFlow versions when applicable.
+A short, self-contained, code snippet that allows us to reproduce the bug in
+  less than 30s.
+The full traceback if an exception is raised.
+Attach any other additional information, like screenshots, you think may help.
+
+To get the OS and software versions automatically, run the following command:
+
+transformers-cli env
+You can also run the same command from the root of the repository:
+
+python src/transformers/commands/transformers_cli.py env
+Do you want a new feature?
+If there is a new feature you'd like to see in 🤗 Transformers, please open an issue and describe:
+
+What is the motivation behind this feature? Is it related to a problem or frustration with the library? Is it a feature related to something you need for a project? Is it something you worked on and think it could benefit the community?
+
+Whatever it is, we'd love to hear about it!
+
+Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you.
+Provide a code snippet that demonstrates the features usage.
+If the feature is related to a paper, please include a link.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f34045f0971f97ffb4707623431b30d078e5123
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+If your issue is well written we're already 80% of the way there by the time you create it.
+We have added templates to help you get started with your issue.
+Do you want to implement a new model?
+New models are constantly released and if you want to implement a new model, please provide the following information:
+
+A short description of the model and a link to the paper.
+Link to the implementation if it is open-sourced.
+Link to the model weights if they are available.
+
+If you are willing to contribute the model yourself, let us know so we can help you add it to 🤗 Transformers!
+We have a technical guide for how to add a model to 🤗 Transformers.
+Do you want to add documentation?
+We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested!
+For more details about how to generate, build, and write the documentation, take a look at the documentation README.
+Create a Pull Request
+Before writing any code, we strongly advise you to search through the existing PRs or
+issues to make sure nobody is already working on the same thing. If you are
+unsure, it is always a good idea to open an issue to get some feedback.
+You will need basic git proficiency to contribute to
+🤗 Transformers. While git is not the easiest tool to use, it has the greatest
+manual. Type git --help in a shell and enjoy! If you prefer books, Pro
+Git is a very good reference.
+You'll need Python 3.8 or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+
+Fork the repository by
+   clicking on the Fork button on the repository's page. This creates a copy of the code
+   under your GitHub user account.
+
+Clone your fork to your local disk, and add the base repository as a remote:
+
+   git clone git@github.com:<your Github handle>/transformers.git
+   cd transformers
+   git remote add upstream https://github.com/huggingface/transformers.git
+
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b90ae24001e802ed19babaadd4ca372f633ab71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_3.txt
@@ -0,0 +1,48 @@
+Create a new branch to hold your development changes:
+
+   git checkout -b a-descriptive-name-for-my-changes
+🚨 Do not work on the main branch!
+
+Set up a development environment by running the following command in a virtual environment:
+
+   pip install -e ".[dev]"
+If 🤗 Transformers was already installed in the virtual environment, remove
+   it with pip uninstall transformers before reinstalling it in editable
+   mode with the -e flag.
+Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:
+
+   pip install -e ".[quality]"
+which should be enough for most use cases.
+
+Develop the features in your branch.
+
+As you work on your code, you should make sure the test suite
+   passes. Run the tests impacted by your changes like this:
+
+   pytest tests/<TEST_TO_RUN>.py
+For more information about tests, check out the
+   Testing guide.
+🤗 Transformers relies on black and ruff to format its source code
+   consistently. After you make changes, apply automatic style corrections and code verifications
+   that can't be automated in one go with:
+
+   make fixup
+This target is also optimized to only work with files modified by the PR you're working on.
+If you prefer to run the checks one after the other, the following command applies the
+   style corrections:
+
+   make style
+🤗 Transformers also uses ruff and a few custom scripts to check for coding mistakes. Quality
+   controls are run by the CI, but you can run the same checks with:
+
+   make quality
+Finally, we have a lot of scripts to make sure we don't forget to update
+   some files when adding a new model. You can run these scripts with:
+
+   make repo-consistency
+To learn more about those checks and how to fix any issues with them, check out the
+   Checks on a Pull Request guide.
+If you're modifying documents under the docs/source directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
+   make sure you install the documentation builder:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f53b9716430fb5fe645c2a5a52df91fc75a3bd73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+pip install ".[docs]"
+Run the following command from the root of the repository:
+
+   doc-builder build transformers docs/source/en --build_dir ~/tmp/test-build
+This will build the documentation in the ~/tmp/test-build folder where you can inspect the generated
+   Markdown files with your favorite editor. You can also preview the docs on GitHub when you open a pull request.
+Once you're happy with your changes, add the changed files with git add and
+   record your changes locally with git commit:
+
+   git add modified_file.py
+   git commit
+Please remember to write good commit
+   messages to clearly communicate the changes you made!
+To keep your copy of the code up to date with the original
+   repository, rebase your branch on upstream/branch before you open a pull request or if requested by a maintainer:
+
+   git fetch upstream
+   git rebase upstream/main
+Push your changes to your branch:
+
+   git push -u origin a-descriptive-name-for-my-changes
+If you've already opened a pull request, you'll need to force push with the --force flag. Otherwise, if the pull request hasn't been opened yet, you can just push your changes normally.
+
+Now you can go to your fork of the repository on GitHub and click on Pull Request to open a pull request. Make sure you tick off all the boxes on our checklist below. When you're ready, you can send your changes to the project maintainers for review.
+
+It's ok if maintainers request changes, it happens to our core contributors
+   too! So everyone can see the changes in the pull request, work in your local
+   branch and push the changes to your fork. They will automatically appear in
+   the pull request.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71ee94641826468637a973fc418c628ddcf14d80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_5.txt
@@ -0,0 +1,33 @@
+Pull request checklist
+☐ The pull request title should summarize your contribution.
+☐ If your pull request addresses an issue, please mention the issue number in the pull
+request description to make sure they are linked (and people viewing the issue know you
+are working on it).
+☐ To indicate a work in progress please prefix the title with [WIP]. These are
+useful to avoid duplicated work, and to differentiate it from PRs ready to be merged.
+☐ Make sure existing tests pass.
+☐ If adding a new feature, also add tests for it.
+   - If you are adding a new model, make sure you use
+     ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,) to trigger the common tests.
+   - If you are adding new @slow tests, make sure they pass using
+     RUN_SLOW=1 python -m pytest tests/models/my_new_model/test_my_new_model.py.
+   - If you are adding a new tokenizer, write tests and make sure
+     RUN_SLOW=1 python -m pytest tests/models/{your_model_name}/test_tokenization_{your_model_name}.py passes.
+   - CircleCI does not run the slow tests, but GitHub Actions does every night!
+☐ All public methods must have informative docstrings (see
+modeling_bert.py
+for an example).
+☐ Due to the rapidly growing repository, don't add any images, videos and other
+non-text files that'll significantly weigh down the repository. Instead, use a Hub
+repository such as hf-internal-testing
+to host these files and reference them by URL. We recommend placing documentation
+related images in the following repository:
+huggingface/documentation-images.
+You can open a PR on this dataset repository and ask a Hugging Face member to merge it.
+For more information about the checks run on a pull request, take a look at our Checks on a Pull Request guide.
+Tests
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the tests folder and examples tests in the
+examples folder.
+We like pytest and pytest-xdist because it's faster. From the root of the
+repository, specify a path to a subfolder or a test file to run the test:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ffb54a446355a349286700ffe123d4ec5128d94
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_6.txt
@@ -0,0 +1,25 @@
+python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+Similarly, for the examples directory, specify a path to a subfolder or test file to run the test. For example, the following command tests the text classification subfolder in the PyTorch examples directory:
+
+pip install -r examples/xxx/requirements.txt  # only needed the first time
+python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+In fact, this is actually how our make test and make test-examples commands are implemented (not including the pip install)!
+You can also specify a smaller set of tests in order to test only the feature
+you're working on.
+By default, slow tests are skipped but you can set the RUN_SLOW environment variable to
+yes to run them. This will download many gigabytes of models so make sure you
+have enough disk space, a good internet connection or a lot of patience!
+
+Remember to specify a path to a subfolder or a test file to run the test. Otherwise, you'll run all the tests in the tests or examples folder, which will take a very long time!
+
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_new_model
+RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- RUN_CUSTOM_TOKENIZERS: Enables tests for custom tokenizers.
+- RUN_PT_FLAX_CROSS_TESTS: Enables tests for PyTorch + Flax integration.
+- RUN_PT_TF_CROSS_TESTS: Enables tests for TensorFlow + PyTorch integration.
+More environment variables and additional information can be found in the testing_utils.py.
+🤗 Transformers uses pytest as a test runner only. It doesn't use any
+pytest-specific features in the test suite itself.
+This means unittest is fully supported. Here's how to run tests with
+unittest:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b33e90c687c49cb413af83c5d2684a9644fcaf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_contributing.txt_chunk_7.txt
@@ -0,0 +1,28 @@
+python -m unittest discover -s tests -t . -v
+python -m unittest discover -s examples -t examples -v
+Style guide
+For documentation strings, 🤗 Transformers follows the Google Python Style Guide.
+Check our documentation writing guide
+for more information.
+Develop on Windows
+On Windows (unless you're working in Windows Subsystem for Linux or WSL), you need to configure git to transform Windows CRLF line endings to Linux LF line endings:
+
+git config core.autocrlf input
+One way to run the make command on Windows is with MSYS2:
+
+Download MSYS2, and we assume it's installed in C:\msys64.
+Open the command line C:\msys64\msys2.exe (it should be available from the Start menu).
+Run in the shell: pacman -Syu and install make with pacman -S make.
+Add C:\msys64\usr\bin to your PATH environment variable.
+
+You can now use make from any terminal (PowerShell, cmd.exe, etc.)! 🎉
+Sync a forked repository with upstream main (the Hugging Face repository)
+When updating the main branch of a forked repository, please follow these steps to avoid pinging the upstream repository which adds reference notes to each upstream PR, and sends unnecessary notifications to the developers involved in these PRs.
+
+When possible, avoid syncing with the upstream using a branch and PR on the forked repository. Instead, merge directly into the forked main.
+If a PR is absolutely necessary, use the following steps after checking out your branch:
+
+   git checkout -b your-branch-for-syncing
+   git pull --squash --no-commit upstream main
+   git commit -m '<your message without GitHub references>'
+   git push --set-upstream origin your-branch-for-syncing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a81e502041e5577684135d3f195b9a3b76672f73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Chatting with Transformers
+If you're reading this article, you're almost certainly aware of chat models. Chat models are conversational
+AIs that you can send and receive messages with. The most famous of these is the proprietary ChatGPT, but there are
+now many open-source chat models which match or even substantially exceed its performance. These models are free to
+download and run on a local machine. Although the largest and most capable models require high-powered hardware
+and lots of memory to run, there are smaller models that will run perfectly well on a single consumer GPU, or even
+an ordinary desktop or notebook CPU. 
+This guide will help you get started with chat models. We'll start with a brief quickstart guide that uses a convenient,
+high-level "pipeline". This is all you need if you just want to start running a chat model 
+immediately. After the quickstart, we'll move on to more detailed information about
+what exactly chat models are, how to choose an appropriate one, and a low-level breakdown of each of the
+steps involved in talking to a chat model. We'll also give some tips on optimizing the performance and memory usage
+of your chat models.
+Quickstart
+If you have no time for details, here's the brief summary: Chat models continue chats. This means that you pass them
+a conversation history, which can be as short as a single user message, and the model will continue the conversation
+by adding its response. Let's see this in action. First, let's build a chat:
+python
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+Notice that in addition to the user's message, we added a system message at the start of the conversation. Not all
+chat models support system messages, but when they do, they represent high-level directives about how the model
+should behave in the conversation. You can use this to guide the model - whether you want short or long responses,
+lighthearted or serious ones, and so on. If you want the model to do useful work instead of
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e7b2c35018df1051abd46404162c02491ad50fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+practicing its improv routine, you can either omit the system message or try a terse one such as "You are a helpful and intelligent
+AI assistant who responds to user queries."
+Once you have a chat, the quickest way to continue it is using the [TextGenerationPipeline]. 
+Let's see this in action with LLaMA-3. Note that LLaMA-3 is a gated model, which means you will need to 
+apply for access and log in with your Hugging Face 
+account to use it. We'll also use device_map="auto", which will load the model on GPU if there's enough memory
+for it, and set the dtype to torch.bfloat16 to save memory:
+thon
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57642c59a237283cbe0c5b212a09c781da0366b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
+can be quite large. They can therefore be several times faster than a normal "dense" model of the same size. However,
+techniques like assisted generation are generally ineffective for these models because more parameters will become
+active with each new speculated token, which will negate the bandwidth and speed benefits that the MoE architecture
+provides.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e16f1d0ccdd68dbc30ce858dbb8a436218e142a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+And you'll get:
+```text
+(sigh) Oh boy, you're asking me for advice? You're gonna need a map, pal! Alright, 
+alright, I'll give you the lowdown. But don't say I didn't warn you, I'm a robot, not a tour guide!
+So, you wanna know what's fun to do in the Big Apple? Well, let me tell you, there's a million 
+things to do, but I'll give you the highlights. First off, you gotta see the sights: the Statue of 
+Liberty, Central Park, Times Square you know, the usual tourist traps. But if you're lookin' for 
+something a little more unusual, I'd recommend checkin' out the Museum of Modern Art. It's got 
+some wild stuff, like that Warhol guy's soup cans and all that jazz.
+And if you're feelin' adventurous, take a walk across the Brooklyn Bridge. Just watch out for 
+those pesky pigeons, they're like little feathered thieves! (laughs) Get it? Thieves? Ah, never mind.
+Now, if you're lookin' for some serious fun, hit up the comedy clubs in Greenwich Village. You might 
+even catch a glimpse of some up-and-coming comedians or a bunch of wannabes tryin' to make it big. (winks)
+And finally, if you're feelin' like a real New Yorker, grab a slice of pizza from one of the many amazing
+pizzerias around the city. Just don't try to order a "robot-sized" slice, trust me, it won't end well. (laughs)
+So, there you have it, pal! That's my expert advice on what to do in New York. Now, if you'll
+excuse me, I've got some oil changes to attend to. (winks)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21339116db17fe3f096b7b1a068678d1ef384eaa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+You can continue the chat by appending your own response to it. The
+response object returned by the pipeline actually contains the entire chat so far, so we can simply append
+a message and pass it back:
+python
+chat = response[0]['generated_text']
+chat.append(
+    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
+)
+response = pipe(chat, max_new_tokens=512)
+print(response[0]['generated_text'][-1]['content'])
+And you'll get:
+```text
+(laughs) Oh, you're killin' me, pal! You don't get it, do you? Warhol's soup cans are like, art, man! 
+It's like, he took something totally mundane, like a can of soup, and turned it into a masterpiece. It's 
+like, "Hey, look at me, I'm a can of soup, but I'm also a work of art!" 
+(sarcastically) Oh, yeah, real original, Andy.
+But, you know, back in the '60s, it was like, a big deal. People were all about challenging the
+status quo, and Warhol was like, the king of that. He took the ordinary and made it extraordinary.
+And, let me tell you, it was like, a real game-changer. I mean, who would've thought that a can of soup could be art? (laughs)
+But, hey, you're not alone, pal. I mean, I'm a robot, and even I don't get it. (winks)
+But, hey, that's what makes art, art, right? (laughs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb364d7cea7185320af997eae848d451e727cdbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_4.txt
@@ -0,0 +1,26 @@
+The remainder of this tutorial will cover specific topics such
+as performance and memory, or how to select a chat model for your needs.
+Choosing a chat model
+There are an enormous number of different chat models available on the Hugging Face Hub,
+and new users often feel very overwhelmed by the selection offered. Don't be, though! You really need to just focus on
+two important considerations: 
+- The model's size, which will determine if you can fit it in memory and how quickly it will
+run.
+- The quality of the model's chat output.
+In general, these are correlated - bigger models tend to be 
+more capable, but even so there's a lot of variation at a given size point!
+Size and model naming
+The size of a model is easy to spot - it's the number in the model name, like "8B" or "70B". This is the number of
+parameters in the model. Without quantization, you should expect to need about 2 bytes of memory per parameter.
+This means that an "8B" model with 8 billion parameters will need about 16GB of memory just to fit the parameters, 
+plus a little extra for other overhead. It's a good fit for a high-end consumer GPU with 24GB of memory, such as a 3090
+or 4090.
+Some chat models are "Mixture of Experts" models. These may list their sizes in different ways, such as "8x7B" or 
+"141B-A35B". The numbers are a little fuzzier here, but in general you can read this as saying that the model
+has approximately 56 (8x7) billion parameters in the first case, or 141 billion parameters in the second case.
+Note that it is very common to use quantization techniques to reduce the memory usage per parameter to 8 bits, 4 bits,
+or even less. This topic is discussed in more detail in the Memory considerations section below.
+But which chat model is best?
+Even once you know the size of chat model you can run, there's still a lot of choice out there. One way to sift through
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86f96b70abb384162c11b2c7e3343a20aedbf2f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_5.txt
@@ -0,0 +1,31 @@
+it all is to consult leaderboards. Two of the most popular leaderboards are the OpenLLM Leaderboard
+and the LMSys Chatbot Arena Leaderboard. Note that the LMSys leaderboard
+also includes proprietary models - look at the licence column to identify open-source ones that you can download, then
+search for them on the Hugging Face Hub.
+Specialist domains
+Some models may be specialized for certain domains, such as medical or legal text, or non-English languages. 
+If you're working in these domains, you may find that a specialized model will give you big performance benefits. 
+Don't automatically assume that, though! Particularly when specialized models are smaller or older than the current 
+cutting-edge, a top-end general-purpose model may still outclass them. Thankfully, we are beginning to see 
+domain-specific leaderboards that should make it easier to locate
+the best models for specialized domains.
+What happens inside the pipeline?
+The quickstart above used a high-level pipeline to chat with a chat model, which is convenient, but not the
+most flexible. Let's take a more low-level approach, to see each of the steps involved in chat. Let's start with
+a code sample, and then break it down:
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+Prepare the input as before
+chat = [
+    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
+    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
+]
+1: Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+2: Apply the chat template
+formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..769683ca2ee842a443aac60ae827346a5fa4a657
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+print("Formatted chat:\n", formatted_chat)
+3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
+inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
+Move the tokenized inputs to the same device the model is on (GPU/CPU)
+inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
+print("Tokenized inputs:\n", inputs)
+4: Generate text from the model
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.)
+print("Generated tokens:\n", outputs)
+5: Decode the output back to a string
+decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
+print("Decoded output:\n", decoded_output)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea3f1ceeff7c92fb0c2909bacd188392e0393f86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_7.txt
@@ -0,0 +1,27 @@
+There's a lot in here, each piece of which could be its own document! Rather than going into too much detail, I'll cover
+the broad ideas, and leave the details for the linked documents. The key steps are:
+
+Models and Tokenizers are loaded from the Hugging Face Hub.
+The chat is formatted using the tokenizer's chat template
+The formatted chat is tokenized using the tokenizer.
+We generate a response from the model.
+The tokens output by the model are decoded back to a string
+
+Performance, memory and hardware
+You probably know by now that most machine learning tasks are run on GPUs. However, it is entirely possible
+to generate text from a chat model or language model on a CPU, albeit somewhat more slowly. If you can fit
+the model in GPU memory, though, this will usually be the preferable option.
+Memory considerations
+By default, Hugging Face classes like [TextGenerationPipeline] or [AutoModelForCausalLM] will load the model in 
+float32 precision. This means that it will need 4 bytes (32 bits) per parameter, so an "8B" model with 8 billion
+parameters will need ~32GB of memory. However, this can be wasteful! Most modern language models are trained in 
+"bfloat16" precision, which uses only 2 bytes per parameter. If your hardware supports it (Nvidia 30xx/Axxx
+or newer), you can load the model in bfloat16 precision, using the torch_dtype argument as we did above.
+It is possible to go even lower than 16-bits using "quantization", a method to lossily compress model weights. This
+allows each parameter to be squeezed down to 8 bits, 4 bits or even less. Note that, especially at 4 bits,
+the model's outputs may be negatively affected, but often this is a tradeoff worth making to fit a larger and more
+capable chat model in memory. Let's see this in action with bitsandbytes:
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", quantization_config=quantization_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b99cc9303c82de1d2f9ee6913c338e5e14a7959d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_8.txt
@@ -0,0 +1,11 @@
+Or we can do the same thing using the pipeline API:
+thon
+from transformers import pipeline, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)  # You can also try load_in_4bit
+pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})
+
+There are several other options for quantizing models besides bitsandbytes - please see the Quantization guide
+for more information.
+Performance considerations
+
+For a more extensive guide on language model performance and optimization, check out LLM Inference Optimization .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de3e31c010be5bfaed15be40eaf6764cb7785d40
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_conversations.txt_chunk_9.txt
@@ -0,0 +1,21 @@
+As a general rule, larger chat models will be slower in addition to requiring more memory. It's possible to be
+more concrete about this, though: Generating text from a chat model is unusual in that it is bottlenecked by
+memory bandwidth rather than compute power, because every active parameter must be read from memory for each
+token that the model generates. This means that number of tokens per second you can generate from a chat
+model is generally proportional to the total bandwidth of the memory it resides in, divided by the size of the model.
+In our quickstart example above, our model was ~16GB in size when loaded in bfloat16 precision. 
+This means that 16GB must be read from memory for every token generated by the model. Total memory bandwidth can
+vary from 20-100GB/sec for consumer CPUs to 200-900GB/sec for consumer GPUs, specialized CPUs like
+Intel Xeon, AMD Threadripper/Epyc or high-end Apple silicon, and finally up to 2-3TB/sec for data center GPUs like
+the Nvidia A100 or H100. This should give you a good idea of the generation speed you can expect from these different
+hardware types.
+Therefore, if you want to improve the speed of text generation, the easiest solution is to either reduce the
+size of the model in memory (usually by quantization), or get hardware with higher memory bandwidth. For advanced users, 
+several other techniques exist to get around this bandwidth bottleneck. The most common are variants on 
+assisted generation, also known as "speculative
+sampling". These techniques try to guess multiple future tokens at once, often using a smaller "draft model", and then
+confirm these generations with the chat model. If the guesses are validated by the chat model, more than one token can
+be generated per forward pass, which greatly alleviates the bandwidth bottleneck and improves generation speed.  
+Finally, we should also note the impact of "Mixture of Experts" (MoE) models here. Several popular chat models,
+such as Mixtral, Qwen-MoE and DBRX, are MoE models. In these models, not every parameter is active for every token generated.
+As a result, MoE models generally have much lower memory bandwidth requirements, even though their total size
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd36cfa107fac4a301ce2888be31b8f65087936
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Create a custom architecture
+An AutoClass automatically infers the model architecture and downloads pretrained configuration and weights. Generally, we recommend using an AutoClass to produce checkpoint-agnostic code. But users who want more control over specific model parameters can create a custom 🤗 Transformers model from just a few base classes. This could be particularly useful for anyone who is interested in studying, training or experimenting with a 🤗 Transformers model. In this guide, dive deeper into creating a custom model without an AutoClass. Learn how to:
+
+Load and customize a model configuration.
+Create a model architecture.
+Create a slow and fast tokenizer for text.
+Create an image processor for vision tasks.
+Create a feature extractor for audio tasks.
+Create a processor for multimodal tasks.
+
+Configuration
+A configuration refers to a model's specific attributes. Each model configuration has different attributes; for instance, all NLP models have the hidden_size, num_attention_heads, num_hidden_layers and vocab_size attributes in common. These attributes specify the number of attention heads or hidden layers to construct a model with.
+Get a closer look at DistilBERT by accessing [DistilBertConfig] to inspect it's attributes:
+
+from transformers import DistilBertConfig
+config = DistilBertConfig()
+print(config)
+DistilBertConfig {
+  "activation": "gelu",
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+[DistilBertConfig] displays all the default attributes used to build a base [DistilBertModel]. All attributes are customizable, creating space for experimentation. For example, you can customize a default model to:
+
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee0f9f784d3c5cae17be9a74ff4ce4d51b0bd3a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+Try a different activation function with the activation parameter.
+Use a higher dropout ratio for the attention probabilities with the attention_dropout parameter.
+
+my_config = DistilBertConfig(activation="relu", attention_dropout=0.4)
+print(my_config)
+DistilBertConfig {
+  "activation": "relu",
+  "attention_dropout": 0.4,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}
+
+Pretrained model attributes can be modified in the [~PretrainedConfig.from_pretrained] function:
+
+my_config = DistilBertConfig.from_pretrained("distilbert/distilbert-base-uncased", activation="relu", attention_dropout=0.4)
+
+Once you are satisfied with your model configuration, you can save it with [~PretrainedConfig.save_pretrained]. Your configuration file is stored as a JSON file in the specified save directory:
+
+my_config.save_pretrained(save_directory="./your_model_save_path")
+
+To reuse the configuration file, load it with [~PretrainedConfig.from_pretrained]:
+
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52327195bceeb250aa32668ef3d2b961eb896a5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+You can also save your configuration file as a dictionary or even just the difference between your custom configuration attributes and the default configuration attributes! See the configuration documentation for more details.
+
+Model
+The next step is to create a model. The model - also loosely referred to as the architecture - defines what each layer is doing and what operations are happening. Attributes like num_hidden_layers from the configuration are used to define the architecture. Every model shares the base class [PreTrainedModel] and a few common methods like resizing input embeddings and pruning self-attention heads. In addition, all models are also either a torch.nn.Module, tf.keras.Model or flax.linen.Module subclass. This means models are compatible with each of their respective framework's usage.
+
+Load your custom configuration attributes into the model:
+
+from transformers import DistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/config.json")
+model = DistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~PreTrainedModel.from_pretrained]:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+model = DistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Load your custom configuration attributes into the model:
+
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c46daaafc23a8589dfcf028024637f8b9a7a7fab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+from transformers import TFDistilBertModel
+my_config = DistilBertConfig.from_pretrained("./your_model_save_path/my_config.json")
+tf_model = TFDistilBertModel(my_config)
+
+This creates a model with random values instead of pretrained weights. You won't be able to use this model for anything useful yet until you train it. Training is a costly and time-consuming process. It is generally better to use a pretrained model to obtain better results faster, while using only a fraction of the resources required for training.
+Create a pretrained model with [~TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased")
+
+When you load pretrained weights, the default model configuration is automatically loaded if the model is provided by 🤗 Transformers. However, you can still replace - some or all of - the default model configuration attributes with your own if you'd like:
+
+tf_model = TFDistilBertModel.from_pretrained("distilbert/distilbert-base-uncased", config=my_config)
+
+Model heads
+At this point, you have a base DistilBERT model which outputs the hidden states. The hidden states are passed as inputs to a model head to produce the final output. 🤗 Transformers provides a different model head for each task as long as a model supports the task (i.e., you can't use DistilBERT for a sequence-to-sequence task like translation).
+
+For example, [DistilBertForSequenceClassification] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import DistilBertForSequenceClassification
+model = DistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [DistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94cab0869512612e8c2f59a6dd3156bdc6d14dbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+from transformers import DistilBertForQuestionAnswering
+model = DistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+``
+</pt>
+<tf>
+For example, [TFDistilBertForSequenceClassification`] is a base DistilBERT model with a sequence classification head. The sequence classification head is a linear layer on top of the pooled outputs.
+
+from transformers import TFDistilBertForSequenceClassification
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+
+Easily reuse this checkpoint for another task by switching to a different model head. For a question answering task, you would use the [TFDistilBertForQuestionAnswering] model head. The question answering head is similar to the sequence classification head except it is a linear layer on top of the hidden states output.
+
+from transformers import TFDistilBertForQuestionAnswering
+tf_model = TFDistilBertForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Tokenizer
+The last base class you need before using a model for textual data is a tokenizer to convert raw text to tensors. There are two types of tokenizers you can use with 🤗 Transformers:
+
+[PreTrainedTokenizer]: a Python implementation of a tokenizer.
+[PreTrainedTokenizerFast]: a tokenizer from our Rust-based 🤗 Tokenizer library. This tokenizer type is significantly faster - especially during batch tokenization - due to its Rust implementation. The fast tokenizer also offers additional methods like offset mapping which maps tokens to their original words or characters.
+
+Both tokenizers support common methods such as encoding and decoding, adding new tokens, and managing special tokens.
+
+Not every model supports a fast tokenizer. Take a look at this table to check if a model has fast tokenizer support.
+
+If you trained your own tokenizer, you can create one from your vocabulary file:
+
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e565fc393a9ccc991497fae223928d312393a24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_5.txt
@@ -0,0 +1,43 @@
+from transformers import DistilBertTokenizer
+my_tokenizer = DistilBertTokenizer(vocab_file="my_vocab_file.txt", do_lower_case=False, padding_side="left")
+
+It is important to remember the vocabulary from a custom tokenizer will be different from the vocabulary generated by a pretrained model's tokenizer. You need to use a pretrained model's vocabulary if you are using a pretrained model, otherwise the inputs won't make sense. Create a tokenizer with a pretrained model's vocabulary with the [DistilBertTokenizer] class:
+
+from transformers import DistilBertTokenizer
+slow_tokenizer = DistilBertTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a fast tokenizer with the [DistilBertTokenizerFast] class:
+
+from transformers import DistilBertTokenizerFast
+fast_tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert/distilbert-base-uncased")
+
+By default, [AutoTokenizer] will try to load a fast tokenizer. You can disable this behavior by setting use_fast=False in from_pretrained.
+
+Image processor
+An image processor processes vision inputs. It inherits from the base [~image_processing_utils.ImageProcessingMixin] class.
+To use, create an image processor associated with the model you're using. For example, create a default [ViTImageProcessor] if you are using ViT for image classification:
+
+from transformers import ViTImageProcessor
+vit_extractor = ViTImageProcessor()
+print(vit_extractor)
+ViTImageProcessor {
+  "do_normalize": true,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "size": 224
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d53224bf029c852dcefb99ded598b299599a166c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_6.txt
@@ -0,0 +1,43 @@
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default image processor parameters.
+
+Modify any of the [ViTImageProcessor] parameters to create your custom image processor:
+
+from transformers import ViTImageProcessor
+my_vit_extractor = ViTImageProcessor(resample="PIL.Image.BOX", do_normalize=False, image_mean=[0.3, 0.3, 0.3])
+print(my_vit_extractor)
+ViTImageProcessor {
+  "do_normalize": false,
+  "do_resize": true,
+  "image_processor_type": "ViTImageProcessor",
+  "image_mean": [
+    0.3,
+    0.3,
+    0.3
+  ],
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": "PIL.Image.BOX",
+  "size": 224
+}
+
+Backbone
+
+Computer vision models consist of a backbone, neck, and head. The backbone extracts features from an input image, the neck combines and enhances the extracted features, and the head is used for the main task (e.g., object detection). Start by initializing a backbone in the model config and specify whether you want to load pretrained weights or load randomly initialized weights. Then you can pass the model config to the model head.
+For example, to load a ResNet backbone into a MaskFormer model with an instance segmentation head:
+
+Set use_pretrained_backbone=True to load pretrained ResNet weights for the backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_pretrained_backbone=False to randomly initialize a ResNet backbone.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="microsoft/resnet-50", use_pretrained_backbone=False) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config separately and then pass it to the model config.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..589cc4fd9ee5113154fdeda68d6935f644cef056
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_7.txt
@@ -0,0 +1,30 @@
+You could also load the backbone config separately and then pass it to the model config.
+
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation, ResNetConfig
+backbone_config = ResNetConfig()
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
+
+timm models are loaded within a model with use_timm_backbone=True or with [TimmBackbone] and [TimmBackboneConfig].
+Use use_timm_backbone=True and use_pretrained_backbone=True to load pretrained timm weights for the backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=True, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+Set use_timm_backbone=True and use_pretrained_backbone=False to load a randomly initialized timm backbone.
+thon
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone="resnet50", use_pretrained_backbone=False, use_timm_backbone=True) # backbone and neck config
+model = MaskFormerForInstanceSegmentation(config) # head
+
+You could also load the backbone config and use it to create a TimmBackbone or pass it to the model config. Timm backbones will load pretrained weights by default. Set use_pretrained_backbone=False to load randomly initialized weights.
+thon
+from transformers import TimmBackboneConfig, TimmBackbone
+backbone_config = TimmBackboneConfig("resnet50", use_pretrained_backbone=False)
+Create a backbone class
+backbone = TimmBackbone(config=backbone_config)
+Create a model with a timm backbone
+from transformers import MaskFormerConfig, MaskFormerForInstanceSegmentation
+config = MaskFormerConfig(backbone_config=backbone_config)
+model = MaskFormerForInstanceSegmentation(config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e65cf511bbd66b43368343a2a67a9f2a3a1e582
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_8.txt
@@ -0,0 +1,40 @@
+Feature extractor
+A feature extractor processes audio inputs. It inherits from the base [~feature_extraction_utils.FeatureExtractionMixin] class, and may also inherit from the [SequenceFeatureExtractor] class for processing audio inputs.
+To use, create a feature extractor associated with the model you're using. For example, create a default [Wav2Vec2FeatureExtractor] if you are using Wav2Vec2 for audio classification:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor()
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}
+
+If you aren't looking for any customization, just use the from_pretrained method to load a model's default feature extractor parameters.
+
+Modify any of the [Wav2Vec2FeatureExtractor] parameters to create your custom feature extractor:
+
+from transformers import Wav2Vec2FeatureExtractor
+w2v2_extractor = Wav2Vec2FeatureExtractor(sampling_rate=8000, do_normalize=False)
+print(w2v2_extractor)
+Wav2Vec2FeatureExtractor {
+  "do_normalize": false,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 8000
+}
+
+Processor
+For models that support multimodal tasks, 🤗 Transformers offers a processor class that conveniently wraps processing classes such as a feature extractor and a tokenizer into a single object. For example, let's use the [Wav2Vec2Processor] for an automatic speech recognition task (ASR). ASR transcribes audio to text, so you will need a feature extractor and a tokenizer.
+Create a feature extractor to handle the audio inputs:
+
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5233a6f81f2493f0f5a7a2ccf7f250ad62fdf310
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_create_a_model.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+from transformers import Wav2Vec2FeatureExtractor
+feature_extractor = Wav2Vec2FeatureExtractor(padding_value=1.0, do_normalize=True)
+
+Create a tokenizer to handle the text inputs:
+
+from transformers import Wav2Vec2CTCTokenizer
+tokenizer = Wav2Vec2CTCTokenizer(vocab_file="my_vocab_file.txt")
+
+Combine the feature extractor and tokenizer in [Wav2Vec2Processor]:
+
+from transformers import Wav2Vec2Processor
+processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
+
+With two basic classes - configuration and model - and an additional preprocessing class (tokenizer, image processor, feature extractor, or processor), you can create any of the models supported by 🤗 Transformers. Each of these base classes are configurable, allowing you to use the specific attributes you want. You can easily setup a model for training or modify an existing pretrained model to fine-tune.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98cf5548307283faee6e11a1caf62aa177d23822
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+Building custom models
+The 🤗 Transformers library is designed to be easily extensible. Every model is fully coded in a given subfolder
+of the repository with no abstraction, so you can easily copy a modeling file and tweak it to your needs.
+If you are writing a brand new model, it might be easier to start from scratch. In this tutorial, we will show you
+how to write a custom model and its configuration so it can be used inside Transformers, and how you can share it
+with the community (with the code it relies on) so that anyone can use it, even if it's not present in the 🤗
+Transformers library. We'll see how to build upon transformers and extend the framework with your hooks and
+custom code.
+We will illustrate all of this on a ResNet model, by wrapping the ResNet class of the
+timm library into a [PreTrainedModel].
+Writing a custom configuration
+Before we dive into the model, let's first write its configuration. The configuration of a model is an object that
+will contain all the necessary information to build the model. As we will see in the next section, the model can only
+take a config to be initialized, so we really need that object to be as complete as possible.
+
+Models in the transformers library itself generally follow the convention that they accept a config object
+in their __init__ method, and then pass the whole config to sub-layers in the model, rather than breaking the 
+config object into multiple arguments that are all passed individually to sub-layers. Writing your model in this 
+style results in simpler code with a clear "source of truth" for any hyperparameters, and also makes it easier
+to reuse code from other models in transformers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6148acae981c3baea5652bc659f70bb631ecd2ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+In our example, we will take a couple of arguments of the ResNet class that we might want to tweak. Different
+configurations will then give us the different types of ResNets that are possible. We then just store those arguments,
+after checking the validity of a few of them.
+thon
+from transformers import PretrainedConfig
+from typing import List
+class ResnetConfig(PretrainedConfig):
+    model_type = "resnet"
+def __init__(
+    self,
+    block_type="bottleneck",
+    layers: List[int] = [3, 4, 6, 3],
+    num_classes: int = 1000,
+    input_channels: int = 3,
+    cardinality: int = 1,
+    base_width: int = 64,
+    stem_width: int = 64,
+    stem_type: str = "",
+    avg_down: bool = False,
+    **kwargs,
+):
+    if block_type not in ["basic", "bottleneck"]:
+        raise ValueError(f"`block_type` must be 'basic' or bottleneck', got {block_type}.")
+    if stem_type not in ["", "deep", "deep-tiered"]:
+        raise ValueError(f"`stem_type` must be '', 'deep' or 'deep-tiered', got {stem_type}.")
+
+    self.block_type = block_type
+    self.layers = layers
+    self.num_classes = num_classes
+    self.input_channels = input_channels
+    self.cardinality = cardinality
+    self.base_width = base_width
+    self.stem_width = stem_width
+    self.stem_type = stem_type
+    self.avg_down = avg_down
+    super().__init__(**kwargs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f17750079b8cf1314f858ae4c63a9943babc27f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+The three important things to remember when writing you own configuration are the following:
+- you have to inherit from PretrainedConfig,
+- the __init__ of your PretrainedConfig must accept any kwargs,
+- those kwargs need to be passed to the superclass __init__.
+The inheritance is to make sure you get all the functionality from the 🤗 Transformers library, while the two other
+constraints come from the fact a PretrainedConfig has more fields than the ones you are setting. When reloading a
+config with the from_pretrained method, those fields need to be accepted by your config and then sent to the
+superclass.
+Defining a model_type for your configuration (here model_type="resnet") is not mandatory, unless you want to
+register your model with the auto classes (see last section).
+With this done, you can easily create and save your configuration like you would do with any other model config of the
+library. Here is how we can create a resnet50d config and save it:
+py
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d_config.save_pretrained("custom-resnet")
+This will save a file named config.json inside the folder custom-resnet. You can then reload your config with the
+from_pretrained method:
+py
+resnet50d_config = ResnetConfig.from_pretrained("custom-resnet")
+You can also use any other method of the [PretrainedConfig] class, like [~PretrainedConfig.push_to_hub] to
+directly upload your config to the Hub.
+Writing a custom model
+Now that we have our ResNet configuration, we can go on writing the model. We will actually write two: one that
+extracts the hidden features from a batch of images (like [BertModel]) and one that is suitable for image
+classification (like [BertForSequenceClassification]).
+As we mentioned before, we'll only write a loose wrapper of the model to keep it simple for this example. The only
+thing we need to do before writing this class is a map between the block types and actual block classes. Then the
+model is defined from the configuration by passing everything to the ResNet class:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c21ad9ee5c92a8a9c7afdbf6f765754689b5a27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_3.txt
@@ -0,0 +1,56 @@
+from transformers import PreTrainedModel
+from timm.models.resnet import BasicBlock, Bottleneck, ResNet
+from .configuration_resnet import ResnetConfig
+BLOCK_MAPPING = {"basic": BasicBlock, "bottleneck": Bottleneck}
+class ResnetModel(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor):
+    return self.model.forward_features(tensor)
+
+For the model that will classify images, we just change the forward method:
+
+import torch
+class ResnetModelForImageClassification(PreTrainedModel):
+    config_class = ResnetConfig
+def __init__(self, config):
+    super().__init__(config)
+    block_layer = BLOCK_MAPPING[config.block_type]
+    self.model = ResNet(
+        block_layer,
+        config.layers,
+        num_classes=config.num_classes,
+        in_chans=config.input_channels,
+        cardinality=config.cardinality,
+        base_width=config.base_width,
+        stem_width=config.stem_width,
+        stem_type=config.stem_type,
+        avg_down=config.avg_down,
+    )
+
+def forward(self, tensor, labels=None):
+    logits = self.model(tensor)
+    if labels is not None:
+        loss = torch.nn.cross_entropy(logits, labels)
+        return {"loss": loss, "logits": logits}
+    return {"logits": logits}
+
+In both cases, notice how we inherit from PreTrainedModel and call the superclass initialization with the config
+(a bit like when you write a regular torch.nn.Module). The line that sets the config_class is not mandatory, unless
+you want to register your model with the auto classes (see last section).
+
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ef754e73cae3aea9d4f3327f13ce349a3d133b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+If your model is very similar to a model inside the library, you can re-use the same configuration as this model.
+
+You can have your model return anything you want, but returning a dictionary like we did for
+ResnetModelForImageClassification, with the loss included when labels are passed, will make your model directly
+usable inside the [Trainer] class. Using another output format is fine as long as you are planning on using your own
+training loop or another library for training.
+Now that we have our model class, let's create one:
+py
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+Again, you can use any of the methods of [PreTrainedModel], like [~PreTrainedModel.save_pretrained] or
+[~PreTrainedModel.push_to_hub]. We will use the second in the next section, and see how to push the model weights
+with the code of our model. But first, let's load some pretrained weights inside our model.
+In your own use case, you will probably be training your custom model on your own data. To go fast for this tutorial,
+we will use the pretrained version of the resnet50d. Since our model is just a wrapper around it, it's going to be
+easy to transfer those weights:
+
+import timm
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now let's see how to make sure that when we do [~PreTrainedModel.save_pretrained] or [~PreTrainedModel.push_to_hub], the
+code of the model is saved.
+Registering a model with custom code to the auto classes
+If you are writing a library that extends 🤗 Transformers, you may want to extend the auto classes to include your own
+model. This is different from pushing the code to the Hub in the sense that users will need to import your library to
+get the custom models (contrarily to automatically downloading the model code from the Hub).
+As long as your config has a model_type attribute that is different from existing model types, and that your model
+classes have the right config_class attributes, you can just add them to the auto classes like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..497db3a767e61a7876804a25fd83a0b428fc55a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_5.txt
@@ -0,0 +1,26 @@
+from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
+AutoConfig.register("resnet", ResnetConfig)
+AutoModel.register(ResnetConfig, ResnetModel)
+AutoModelForImageClassification.register(ResnetConfig, ResnetModelForImageClassification)
+
+Note that the first argument used when registering your custom config to [AutoConfig] needs to match the model_type
+of your custom config, and the first argument used when registering your custom models to any auto model class needs
+to match the config_class of those models.
+Sending the code to the Hub
+
+This API is experimental and may have some slight breaking changes in the next releases.
+
+First, make sure your model is fully defined in a .py file. It can rely on relative imports to some other files as
+long as all the files are in the same directory (we don't support submodules for this feature yet). For our example,
+we'll define a modeling_resnet.py file and a configuration_resnet.py file in a folder of the current working
+directory named resnet_model. The configuration file contains the code for ResnetConfig and the modeling file
+contains the code of ResnetModel and ResnetModelForImageClassification.
+.
+└── resnet_model
+    ├── __init__.py
+    ├── configuration_resnet.py
+    └── modeling_resnet.py
+The __init__.py can be empty, it's just there so that Python detects resnet_model can be use as a module.
+
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfac07481d1a8369d7873f3352e8f41f824149a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_6.txt
@@ -0,0 +1,37 @@
+If copying a modeling files from the library, you will need to replace all the relative imports at the top of the file
+to import from the transformers package.
+
+Note that you can re-use (or subclass) an existing configuration/model.
+To share your model with the community, follow those steps: first import the ResNet model and config from the newly
+created files:
+py
+from resnet_model.configuration_resnet import ResnetConfig
+from resnet_model.modeling_resnet import ResnetModel, ResnetModelForImageClassification
+Then you have to tell the library you want to copy the code files of those objects when using the save_pretrained
+method and properly register them with a given Auto class (especially for models), just run:
+py
+ResnetConfig.register_for_auto_class()
+ResnetModel.register_for_auto_class("AutoModel")
+ResnetModelForImageClassification.register_for_auto_class("AutoModelForImageClassification")
+Note that there is no need to specify an auto class for the configuration (there is only one auto class for them,
+[AutoConfig]) but it's different for models. Your custom model could be suitable for many different tasks, so you
+have to specify which one of the auto classes is the correct one for your model.
+
+Use register_for_auto_class() if you want the code files to be copied. If you instead prefer to use code on the Hub from another repo, 
+you don't need to call it. In cases where there's more than one auto class, you can modify the config.json directly using the 
+following structure:
+json
+"auto_map": {     
+    "AutoConfig": "<your-repo-name>--<config-name>",     
+    "AutoModel": "<your-repo-name>--<config-name>",
+    "AutoModelFor<Task>": "<your-repo-name>--<config-name>",    
+},
+
+Next, let's create the config and models as we did before:
+
+resnet50d_config = ResnetConfig(block_type="bottleneck", stem_width=32, stem_type="deep", avg_down=True)
+resnet50d = ResnetModelForImageClassification(resnet50d_config)
+pretrained_model = timm.create_model("resnet50d", pretrained=True)
+resnet50d.model.load_state_dict(pretrained_model.state_dict())
+
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e4ccae065f1261181044653e1a8d7dc9c2c331f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_custom_models.txt_chunk_7.txt
@@ -0,0 +1,33 @@
+Now to send the model to the Hub, make sure you are logged in. Either run in your terminal:
+
+huggingface-cli login
+or from a notebook:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+You can then push to your own namespace (or an organization you are a member of) like this:
+py
+resnet50d.push_to_hub("custom-resnet50d")
+On top of the modeling weights and the configuration in json format, this also copied the modeling and
+configuration .py files in the folder custom-resnet50d and uploaded the result to the Hub. You can check the result
+in this model repo.
+See the sharing tutorial for more information on the push to Hub method.
+Using a model with custom code
+You can use any configuration, model or tokenizer with custom code files in its repository with the auto-classes and
+the from_pretrained method. All files and code uploaded to the Hub are scanned for malware (refer to the Hub security documentation for more information), but you should still 
+review the model code and author to avoid executing malicious code on your machine. Set trust_remote_code=True to use
+a model with custom code:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("sgugger/custom-resnet50d", trust_remote_code=True)
+
+It is also strongly encouraged to pass a commit hash as a revision to make sure the author of the models did not
+update the code with some malicious new lines (unless you fully trust the authors of the models).
+py
+commit_hash = "ed94a7c6247d8aedce4647f00f20de6875b5b292"
+model = AutoModelForImageClassification.from_pretrained(
+    "sgugger/custom-resnet50d", trust_remote_code=True, revision=commit_hash
+)
+Note that when browsing the commit history of the model repo on the Hub, there is a button to easily copy the commit
+hash of any commit.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5279277d60a00d05c83300c7be9a744ba648245f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Debugging
+Training on multiple GPUs can be a tricky endeavor whether you're running into installation issues or communication problems between your GPUs. This debugging guide covers some issues you may run into and how to resolve them.
+DeepSpeed CUDA installation
+If you're using DeepSpeed, you've probably already installed it with the following command.
+
+pip install deepspeed
+DeepSpeed compiles CUDA C++ code and it can be a potential source of errors when building PyTorch extensions that require CUDA. These errors depend on how CUDA is installed on your system, and this section focuses on PyTorch built with CUDA 10.2.
+
+For any other installation issues, please open an issue with the DeepSpeed team.
+
+Non-identical CUDA toolkits
+PyTorch comes with its own CUDA toolkit, but to use DeepSpeed with PyTorch, you need to have an identical version of CUDA installed system-wide. For example, if you installed PyTorch with cudatoolkit==10.2 in your Python environment, then you'll also need to have CUDA 10.2 installed system-wide. If you don't have CUDA installed system-wide, you should install it first.
+The exact location may vary from system to system, but usr/local/cuda-10.2 is the most common location on many Unix systems. When CUDA is correctly setup and added to your PATH environment variable, you can find the installation location with the following command:
+
+which nvcc
+Multiple CUDA toolkits
+You may also have more than one CUDA toolkit installed system-wide.
+
+/usr/local/cuda-10.2
+/usr/local/cuda-11.0
+Typically, package installers set the paths to whatever the last version was installed. If the package build fails because it can't find the right CUDA version (despite it being installed system-wide already), then you need to configure the PATH and LD_LIBRARY_PATH environment variables to point to the correct path.
+Take a look at the contents of these environment variables first:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cedcee07238a1fbfea5a569b88c9dfdf23338dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+echo $PATH
+echo $LD_LIBRARY_PATH
+PATH lists the locations of the executables and LD_LIBRARY_PATH lists where to look for shared libraries. Earlier entries are prioritized over later ones, and : is used to separate multiple entries. To tell the build program where to find the specific CUDA toolkit you want, insert the correct path to list first. This command prepends rather than overwrites the existing values.
+```bash
+adjust the version and full path if needed
+export PATH=/usr/local/cuda-10.2/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:$LD_LIBRARY_PATH
+
+In addition, you should also check the directories you assign actually exist. The lib64 sub-directory contains various CUDA .so objects (like libcudart.so) and while it is unlikely your system names them differently, you should check the actual names and change them accordingly.
+Older CUDA versions
+Sometimes, older CUDA versions may refuse to build with newer compilers. For example, if you have gcc-9 but CUDA wants gcc-7. Usually, installing the latest CUDA toolkit enables support for the newer compiler.
+You could also install an older version of the compiler in addition to the one you're currently using (or it may already be installed but it's not used by default and the build system can't see it). To resolve this, you can create a symlink to give the build system visibility to the older compiler.
+```bash
+adapt the path to your system
+sudo ln -s /usr/bin/gcc-7  /usr/local/cuda-10.2/bin/gcc
+sudo ln -s /usr/bin/g++-7  /usr/local/cuda-10.2/bin/g++
+
+Prebuild
+If you're still having issues with installing DeepSpeed or if you're building DeepSpeed at run time, you can try to prebuild the DeepSpeed modules before installing them. To make a local build for DeepSpeed:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 pip install . \
+--global-option="build_ext" --global-option="-j8" --no-cache -v \
+--disable-pip-version-check 2>&1 | tee build.log
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abeaf5e7e0b4833d782b1d2ac99f65c9f1bda081
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.78e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+
+Here you will get a huge number of frames dumped - as many as there were forward calls in your model, so it may or may
+not what you want, but sometimes it can be easier to use for debugging purposes than a normal debugger. For example, if
+a problem starts happening at batch number 150. So you can dump traces for batches 149 and 150 and compare where
+numbers started to diverge.
+You can also specify the batch number after which to stop the training, with:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3], abort_after_batch_num=3)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f1b1048b785d35b7ccaaa677cdee488d77ac22e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+To use NVMe offload, add the DS_BUILD_AIO=1 parameter to the build command and make sure you install the libaio-dev package system-wide.
+
+Next, you'll have to specify your GPU's architecture by editing the TORCH_CUDA_ARCH_LIST variable (find a complete list of NVIDIA GPUs and their corresponding architectures on this page). To check the PyTorch version that corresponds to your architecture, run the following command:
+
+python -c "import torch; print(torch.cuda.get_arch_list())"
+Find the architecture for a GPU with the following command:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; print(torch.cuda.get_device_capability())"
+
+To find the architecture for GPU 0:
+
+CUDA_VISIBLE_DEVICES=0 python -c "import torch; \
+print(torch.cuda.get_device_properties(torch.device('cuda')))
+"_CudaDeviceProperties(name='GeForce RTX 3090', major=8, minor=6, total_memory=24268MB, multi_processor_count=82)"
+This means your GPU architecture is 8.6.
+
+If you get 8, 6, then you can set TORCH_CUDA_ARCH_LIST="8.6". For multiple GPUs with different architectures, list them like TORCH_CUDA_ARCH_LIST="6.1;8.6".
+It is also possible to not specify TORCH_CUDA_ARCH_LIST and the build program automatically queries the GPU architecture of the build. However, it may or may not match the actual GPU on the target machine which is why it is better to explicitly specify the correct architecture.
+For training on multiple machines with the same setup, you'll need to make a binary wheel:
+
+git clone https://github.com/microsoft/DeepSpeed/
+cd DeepSpeed
+rm -rf build
+TORCH_CUDA_ARCH_LIST="8.6" DS_BUILD_CPU_ADAM=1 DS_BUILD_UTILS=1 \
+python setup.py build_ext -j8 bdist_wheel
+This command generates a binary wheel that'll look something like dist/deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl. Now you can install this wheel locally or on another machine.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d7a13f3552a8f495cb6710d5e535042d72c517c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_3.txt
@@ -0,0 +1,35 @@
+pip install deepspeed-0.3.13+8cd046f-cp38-cp38-linux_x86_64.whl
+Multi-GPU Network Issues Debug
+When training or inferencing with DistributedDataParallel and multiple GPU, if you run into issue of inter-communication between processes and/or nodes, you can use the following script to diagnose network issues.
+
+wget https://raw.githubusercontent.com/huggingface/transformers/main/scripts/distributed/torch-distributed-gpu-test.py
+For example to test how 2 GPUs interact do:
+
+python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+If both processes can talk to each and allocate GPU memory each will print an OK status.
+For more GPUs or nodes adjust the arguments in the script.
+You will find a lot more details inside the diagnostics script and even a recipe to how you could run it in a SLURM environment.
+An additional level of debug is to add NCCL_DEBUG=INFO environment variable as follows:
+
+NCCL_DEBUG=INFO python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 torch-distributed-gpu-test.py
+This will dump a lot of NCCL-related debug information, which you can then search online if you find that some problems are reported. Or if you're not sure how to interpret the output you can share the log file in an Issue.
+Underflow and Overflow Detection
+
+This feature is currently available for PyTorch-only.
+
+For multi-GPU training it requires DDP (torch.distributed.launch).
+
+This feature can be used with any nn.Module-based model.
+
+If you start getting loss=NaN or the model inhibits some other abnormal behavior due to inf or nan in
+activations or weights one needs to discover where the first underflow or overflow happens and what led to it. Luckily
+you can accomplish that easily by activating a special module that will do the detection automatically.
+If you're using [Trainer], you just need to add:
+
+--debug underflow_overflow
+to the normal command line arguments, or pass debug="underflow_overflow" when creating the
+[TrainingArguments] object.
+If you're using your own training loop or another Trainer you can accomplish the same with:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3653090a297a3386cb7bcd6a4a9082422256e931
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_4.txt
@@ -0,0 +1,32 @@
+[~debug_utils.DebugUnderflowOverflow] inserts hooks into the model that immediately after each
+forward call will test input and output variables and also the corresponding module's weights. As soon as inf or
+nan is detected in at least one element of the activations or weights, the program will assert and print a report
+like this (this was caught with google/mt5-small under fp16 mixed precision):
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+                  encoder.block.1.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 2.57e+02 input[0]
+0.00e+00 2.85e+02 output
+[]
+                  encoder.block.2.layer.0 T5LayerSelfAttention
+6.78e-04 3.15e+03 input[0]
+2.65e-04 3.42e+03 output[0]
+             None output[1]
+2.25e-01 1.00e+04 output[2]
+                  encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.dropout Dropout
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ece97a5a942be68e68510487c92bfe0762553ecc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_5.txt
@@ -0,0 +1,26 @@
+0.00e+00 8.76e+03 input[0]
+0.00e+00 9.74e+03 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The example output has been trimmed in the middle for brevity.
+The second column shows the value of the absolute largest element, so if you have a closer look at the last few frames,
+the inputs and outputs were in the range of 1e4. So when this training was done under fp16 mixed precision the very
+last step overflowed (since under fp16 the largest number before inf is 64e3). To avoid overflows under
+fp16 the activations must remain way below 1e4, because 1e4 * 1e4 = 1e8 so any matrix multiplication with
+large activations is going to lead to a numerical overflow condition.
+At the very start of the trace you can discover at which batch number the problem occurred (here Detected inf/nan during batch_number=0 means the problem occurred on the first batch).
+Each reported frame starts by declaring the fully qualified entry for the corresponding module this frame is reporting
+for. If we look just at this frame:
+encoder.block.2.layer.1.layer_norm T5LayerNorm
+8.69e-02 4.18e-01 weight
+2.65e-04 3.42e+03 input[0]
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e040b9933758c1f69d3a2d5f1c90c88892f89611
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+1.79e-06 4.65e+00 output
+Here, encoder.block.2.layer.1.layer_norm indicates that it was a layer norm for the first layer, of the second
+block of the encoder. And the specific calls of the forward is T5LayerNorm.
+Let's look at the last few frames of that report:
+Detected inf/nan during batch_number=0
+Last 21 forward frames:
+abs min  abs max  metadata
+[]
+                  encoder.block.2.layer.1.DenseReluDense.wi_0 Linear
+2.17e-07 4.50e+00 weight
+1.79e-06 4.65e+00 input[0]
+2.68e-06 3.70e+01 output
+                  encoder.block.2.layer.1.DenseReluDense.wi_1 Linear
+8.08e-07 2.66e+01 weight
+1.79e-06 4.65e+00 input[0]
+1.27e-04 2.37e+02 output
+                  encoder.block.2.layer.1.DenseReluDense.wo Linear
+1.01e-06 6.44e+00 weight
+0.00e+00 9.74e+03 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.DenseReluDense T5DenseGatedGeluDense
+1.79e-06 4.65e+00 input[0]
+3.18e-04 6.27e+04 output
+                  encoder.block.2.layer.1.dropout Dropout
+3.18e-04 6.27e+04 input[0]
+0.00e+00      inf output
+The last frame reports for Dropout.forward function with the first entry for the only input and the second for the
+only output. You can see that it was called from an attribute dropout inside DenseReluDense class. We can see
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..862fd1e01e6dfa2e04ba5b4116289ca50447ddbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_7.txt
@@ -0,0 +1,25 @@
+that it happened during the first layer, of the 2nd block, during the very first batch. Finally, the absolute largest
+input elements was 6.27e+04 and same for the output was inf.
+You can see here, that T5DenseGatedGeluDense.forward resulted in output activations, whose absolute max value was
+around 62.7K, which is very close to fp16's top limit of 64K. In the next frame we have Dropout which renormalizes
+the weights, after it zeroed some of the elements, which pushes the absolute max value to more than 64K, and we get an
+overflow (inf).
+As you can see it's the previous frames that we need to look into when the numbers start going into very large for fp16
+numbers.
+Let's match the report to the code from models/t5/modeling_t5.py:
+thon
+class T5DenseGatedGeluDense(nn.Module):
+    def init(self, config):
+        super().init()
+        self.wi_0 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wi_1 = nn.Linear(config.d_model, config.d_ff, bias=False)
+        self.wo = nn.Linear(config.d_ff, config.d_model, bias=False)
+        self.dropout = nn.Dropout(config.dropout_rate)
+        self.gelu_act = ACT2FN["gelu_new"]
+def forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4424b303526ebadee6da0aa66a58da53a2b78f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_8.txt
@@ -0,0 +1,36 @@
+Now it's easy to see the dropout call, and all the previous calls as well.
+Since the detection is happening in a forward hook, these reports are printed immediately after each forward
+returns.
+Going back to the full report, to act on it and to fix the problem, we need to go a few frames up where the numbers
+started to go up and most likely switch to the fp32 mode here, so that the numbers don't overflow when multiplied
+or summed up. Of course, there might be other solutions. For example, we could turn off amp temporarily if it's
+enabled, after moving the original forward into a helper wrapper, like so:
+thon
+def _forward(self, hidden_states):
+    hidden_gelu = self.gelu_act(self.wi_0(hidden_states))
+    hidden_linear = self.wi_1(hidden_states)
+    hidden_states = hidden_gelu * hidden_linear
+    hidden_states = self.dropout(hidden_states)
+    hidden_states = self.wo(hidden_states)
+    return hidden_states
+import torch
+def forward(self, hidden_states):
+    if torch.is_autocast_enabled():
+        with torch.cuda.amp.autocast(enabled=False):
+            return self._forward(hidden_states)
+    else:
+        return self._forward(hidden_states)
+
+Since the automatic detector only reports on inputs and outputs of full frames, once you know where to look, you may
+want to analyse the intermediary stages of any specific forward function as well. In such a case you can use the
+detect_overflow helper function to inject the detector where you want it, for example:
+thon
+from debug_utils import detect_overflow
+class T5LayerFF(nn.Module):
+    []
+def forward(self, hidden_states):
+    forwarded_states = self.layer_norm(hidden_states)
+    detect_overflow(forwarded_states, "after layer_norm")
+    forwarded_states = self.DenseReluDense(forwarded_states)
+    detect_overflow(forwarded_states, "after DenseReluDense")
+    return hidden_states + self.dropout(forwarded_states)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..182bcfff37d00766e606ff82566edfabd214584b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_debugging.txt_chunk_9.txt
@@ -0,0 +1,40 @@
+You can see that we added 2 of these and now we track if inf or nan for forwarded_states was detected
+somewhere in between.
+Actually, the detector already reports these because each of the calls in the example above is a nn.Module, but
+let's say if you had some local direct calculations this is how you'd do that.
+Additionally, if you're instantiating the debugger in your own code, you can adjust the number of frames printed from
+its default, e.g.:
+thon
+from transformers.debug_utils import DebugUnderflowOverflow
+debug_overflow = DebugUnderflowOverflow(model, max_frames_to_save=100)
+
+Specific batch absolute min and max value tracing
+The same debugging class can be used for per-batch tracing with the underflow/overflow detection feature turned off.
+Let's say you want to watch the absolute min and max values for all the ingredients of each forward call of a given
+batch, and only do that for batches 1 and 3. Then you instantiate this class as:
+python
+debug_overflow = DebugUnderflowOverflow(model, trace_batch_nums=[1, 3])
+And now full batches 1 and 3 will be traced using the same format as the underflow/overflow detector does.
+Batches are 0-indexed.
+This is helpful if you know that the program starts misbehaving after a certain batch number, so you can fast-forward
+right to that area. Here is a sample truncated output for such configuration:
+
+                  *** Starting batch number=1 ***
+abs min  abs max  metadata
+                  shared Embedding
+1.01e-06 7.92e+02 weight
+0.00e+00 2.47e+04 input[0]
+5.36e-05 7.92e+02 output
+[]
+                  decoder.dropout Dropout
+1.60e-07 2.27e+01 input[0]
+0.00e+00 2.52e+01 output
+                  decoder T5Stack
+     not a tensor output
+                  lm_head Linear
+1.01e-06 7.92e+02 weight
+0.00e+00 1.11e+00 input[0]
+6.06e-02 8.39e+01 output
+                   T5ForConditionalGeneration
+     not a tensor output
+              *** Starting batch number=3 ***
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7444eed55726aca30ee76fd1bc5d75afd606ca43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+DeepSpeed
+DeepSpeed is a PyTorch optimization library that makes distributed training memory-efficient and fast. At it's core is the Zero Redundancy Optimizer (ZeRO) which enables training large models at scale. ZeRO works in several stages:
+
+ZeRO-1, optimizer state partioning across GPUs
+ZeRO-2, gradient partitioning across GPUs
+ZeRO-3, parameteter partitioning across GPUs
+
+In GPU-limited environments, ZeRO also enables offloading optimizer memory and computation from the GPU to the CPU to fit and train really large models on a single GPU. DeepSpeed is integrated with the Transformers [Trainer] class for all ZeRO stages and offloading. All you need to do is provide a config file or you can use a provided template. For inference, Transformers support ZeRO-3 and offloading since it allows loading huge models.
+This guide will walk you through how to deploy DeepSpeed training, the features you can enable, how to setup the config files for different ZeRO stages, offloading, inference, and using DeepSpeed without the [Trainer].
+Installation
+DeepSpeed is available to install from PyPI or Transformers (for more detailed installation options, take a look at the DeepSpeed installation details or the GitHub README).
+
+If you're having difficulties installing DeepSpeed, check the DeepSpeed CUDA installation guide. While DeepSpeed has a pip installable PyPI package, it is highly recommended to install it from source to best match your hardware and to support certain features, like 1-bit Adam, which aren’t available in the PyPI distribution.
+
+pip install deepspeed
+
+pip install transformers[deepspeed]
+
+Memory requirements
+Before you begin, it is a good idea to check whether you have enough GPU and CPU memory to fit your model. DeepSpeed provides a tool for estimating the required CPU/GPU memory. For example, to estimate the memory requirements for the bigscience/T0_3B model on a single GPU:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ab37ee3b64d5d006232a70ab9c21ffbe0f1f687
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+$ python -c 'from transformers import AutoModel; \
+from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \
+model = AutoModel.from_pretrained("bigscience/T0_3B"); \
+estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'
+[]
+Estimated memory needed for params, optim states and gradients for a:
+HW: Setup with 1 node, 1 GPU per node.
+SW: Model with 2783M total params, 65M largest layer params.
+  per CPU  |  per GPU |   Options
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1
+   70.00GB |   0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1
+   62.23GB |   5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0
+    0.37GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=1
+   15.56GB |  46.91GB | offload_param=none, offload_optimizer=none, zero_init=0
+This means you either need a single 80GB GPU without CPU offload or a 8GB GPU and a ~60GB CPU to offload to (these are just the memory requirements for the parameters, optimizer states and gradients, and you'll need a bit more for the CUDA kernels and activations). You should also consider the tradeoff between cost and speed because it'll be cheaper to rent or buy a smaller GPU but it'll take longer to train your model.
+If you have enough GPU memory make sure you disable CPU/NVMe offload to make everything faster.
+Select a ZeRO stage
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42dc83966dd8d93eea0ddd39c846f66e666fd465
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_10.txt
@@ -0,0 +1,51 @@
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 4,
+        "fast_init": false
+    },
+    "offload_param": {
+        "device": "nvme",
+        "nvme_path": "/local_nvme",
+        "pin_memory": true,
+        "buffer_count": 5,
+        "buffer_size": 1e8,
+        "max_in_cpu": 1e9
+    },
+    "aio": {
+        "block_size": 262144,
+        "queue_depth": 32,
+        "thread_count": 1,
+        "single_submit": false,
+        "overlap_events": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+
+DeepSpeed features
+There are a number of important parameters to specify in the DeepSpeed configuration file which are briefly described in this section.
+Activation/gradient checkpointing
+Activation and gradient checkpointing trades speed for more GPU memory which allows you to overcome scenarios where your GPU is out of memory or to increase your batch size for better performance. To enable this feature:
+
+For a Hugging Face model, set model.gradient_checkpointing_enable() or --gradient_checkpointing in the [Trainer].
+For a non-Hugging Face model, use the DeepSpeed Activation Checkpointing API. You could also replace the Transformers modeling code and replace torch.utils.checkpoint with the DeepSpeed API. This approach is more flexible because you can offload the forward activations to the CPU memory instead of recalculating them.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca0970c725c787aa6ff1ffbc0ed8e60aa995b572
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_11.txt
@@ -0,0 +1,35 @@
+Optimizer and scheduler
+DeepSpeed and Transformers optimizer and scheduler can be mixed and matched as long as you don't enable offload_optimizer. When offload_optimizer is enabled, you could use a non-DeepSpeed optimizer (except for LAMB) as long as it has both a CPU and GPU implementation.
+
+The optimizer and scheduler parameters for the config file can be set from the command line to avoid hard to find errors. For example, if the learning rate is set to a different value in another place you can override it from the command line. Aside from the optimizer and scheduler parameters, you'll need to ensure your [Trainer] command line arguments match the DeepSpeed configuration.
+
+DeepSpeed offers several optimizers (Adam, AdamW, OneBitAdam, and LAMB) but you can also import other optimizers from PyTorch. If you don't configure the optimizer in the config, the [Trainer] automatically selects AdamW and either uses the supplied values or the default values for the following parameters from the command line: lr, adam_beta1, adam_beta2, adam_epsilon, weight_decay.
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "optimizer": {
+       "type": "AdamW",
+       "params": {
+         "lr": "auto",
+         "betas": "auto",
+         "eps": "auto",
+         "weight_decay": "auto"
+       }
+   }
+}
+You can also use an unsupported optimizer by adding the following to the top level configuration.
+yaml
+{
+   "zero_allow_untested_optimizer": true
+}
+From DeepSpeed==0.8.3 on, if you want to use offload, you'll also need to the following to the top level configuration because offload works best with DeepSpeed's CPU Adam optimizer.
+yaml
+{
+   "zero_force_ds_cpu_optimizer": false
+}
+
+DeepSpeed supports the LRRangeTest, OneCycle, WarmupLR and WarmupDecayLR learning rate schedulers.
+Transformers and DeepSpeed provide two of the same schedulers:
+
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84329b3a92afdd433d4b0096aa14d435faf7cadd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_12.txt
@@ -0,0 +1,29 @@
+WarmupLR is the same as --lr_scheduler_type constant_with_warmup in Transformers
+WarmupDecayLR is the same as  --lr_scheduler_type linear in Transformers (this is the default scheduler used in Transformers)
+
+If you don't configure the scheduler in the config, the [Trainer] automatically selects WarmupDecayLR and either uses the supplied values or the default values for the following parameters from the command line: warmup_min_lr, warmup_max_lr, warmup_num_steps, total_num_steps (automatically calculated during run time if max_steps is not provided).
+You can set the parameters to "auto" or manually input your own desired values.
+yaml
+{
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     }
+}
+
+Precision
+Deepspeed supports fp32, fp16, and bf16 mixed precision.
+
+If your model doesn't work well with mixed precision, for example if it wasn't pretrained in mixed precision, you may encounter overflow or underflow issues which can cause NaN loss. For these cases, you should use full fp32 precision by explicitly disabling the default fp16 mode.
+yaml
+{
+    "fp16": {
+        "enabled": false
+    }
+}
+For Ampere GPUs and PyTorch > 1.7, it automatically switches to the more efficient tf32 format for some operations but the results are still in fp32. You can control it from the [Trainer] by setting --tf32 to enable it, and --tf32 0 or --no_tf32 to disable it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ef81ef8c1d08418fc6fa26f8f61b5d80a29bd84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_13.txt
@@ -0,0 +1,30 @@
+To configure PyTorch AMP-like fp16 mixed precision reduces memory usage and accelerates training speed. [Trainer] automatically enables or disables fp16 based on the value of args.fp16_backend, and the rest of the config can be set by you. fp16 is enabled from the command line when the following arguments are passed: --fp16, --fp16_backend amp or --fp16_full_eval.
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+For additional DeepSpeed fp16 training options, take a look at the FP16 Training Options reference.
+To configure Apex-like fp16 mixed precision, setup the config as shown below with "auto" or your own values. [Trainer] automatically configure amp based on the values of args.fp16_backend and args.fp16_opt_level. It can also be enabled from the command line when the following arguments are passed: --fp16, --fp16_backend apex or --fp16_opt_level 01.
+yaml
+{
+    "amp": {
+        "enabled": "auto",
+        "opt_level": "auto"
+    }
+}
+
+To use bf16, you'll need at least DeepSpeed==0.6.0. bf16 has the same dynamic range as fp32 and doesn’t require loss scaling. However, if you use gradient accumulation with bf16, gradients are accumulated in bf16 which may not be desired because this format's low precision can lead to lossy accumulation.
+bf16 can be setup in the config file or enabled from the command line when the following arguments are passed: --bf16 or --bf16_full_eval.
+yaml
+{
+    "bf16": {
+        "enabled": "auto"
+    }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d91bb98cc1715b40d73627691d5e8d401a60898
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+Batch size
+The batch size can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets train_micro_batch_size_per_gpu to the value of args.per_device_train_batch_size and train_batch_size to args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps.
+yaml
+{
+    "train_micro_batch_size_per_gpu": "auto",
+    "train_batch_size": "auto"
+}
+Gradient accumulation
+Gradient accumulation can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.gradient_accumulation_steps.
+```yaml
+{
+    "gradient_accumulation_steps": "auto"
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81f4e6e5e67771fa27f7e81d442cdd830554ac6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_15.txt
@@ -0,0 +1,20 @@
+Gradient clipping
+Gradient clipping can be auto-configured or explicitly set. If you choose to use the "auto" option, [Trainer] sets it to the value of args.max_grad_norm.
+yaml
+{
+    "gradient_clipping": "auto"
+}
+Communication data type
+For communication collectives like reduction, gathering and scattering operations, a separate data type is used.
+All gather and scatter operations are performed in the same data type the data is in. For example, if you're training with bf16, the data is also gathered in bf16 because gathering is a non-lossy operation.
+Reduce operations are lossy, for example when gradients are averaged across multiple GPUs. When the communication is done in fp16 or bf16, it is more likely to be lossy because adding multiple numbers in low precision isn't exact. This is especially the case with bf16 which has a lower precision than fp16. For this reason, fp16 is the default for reduction operations because the loss is minimal when averaging gradients.
+You can choose the communication data type by setting the communication_data_type parameter in the config file. For example, choosing fp32 adds a small amount of overhead but ensures the reduction operation is accumulated in fp32 and when it is ready, it is downcasted to whichever half-precision dtype you're training in.
+yaml
+{
+    "communication_data_type": "fp32"
+}
+Deployment
+DeepSpeed can be deployed by different launchers such as torchrun, the deepspeed launcher, or Accelerate. To deploy, add --deepspeed ds_config.json to the [Trainer] command line. It’s recommended to use DeepSpeed’s add_config_arguments utility to add any necessary command line arguments to your code.
+This guide will show you how to deploy DeepSpeed with the deepspeed launcher for different training setups. You can check out this post for more practical usage examples.
+
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..054fc649a065aefa547568fbce1db1c54cd4c4ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_16.txt
@@ -0,0 +1,25 @@
+To deploy DeepSpeed on multiple GPUs, add the --num_gpus parameter. If you want to use all available GPUs, you don't need to add --num_gpus. The example below uses 2 GPUs.
+
+deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero3.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+
+To deploy DeepSpeed on a single GPU, add the --num_gpus parameter. It isn't necessary to explicitly set this value if you only have 1 GPU because DeepSpeed deploys all GPUs it can see on a given node.
+
+deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
+--deepspeed tests/deepspeed/ds_config_zero2.json \
+--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
+--output_dir output_dir --overwrite_output_dir --fp16 \
+--do_train --max_train_samples 500 --num_train_epochs 1 \
+--dataset_name wmt16 --dataset_config "ro-en" \
+--source_lang en --target_lang ro
+DeepSpeed is still useful with just 1 GPU because you can:
+
+Offload some computations and memory to the CPU to make more GPU resources available to your model to use a larger batch size or fit a very large model that normally won't fit.
+Minimize memory fragmentation with it's smart GPU memory management system which also allows you to fit bigger models and data batches.
+
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dcc49276a765ca29d6918f958d4657739ea0520
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_17.txt
@@ -0,0 +1,27 @@
+Set the allgather_bucket_size and reduce_bucket_size values to 2e8 in the ZeRO-2 configuration file to get better performance on a single GPU.
+
+Multi-node deployment
+A node is one or more GPUs for running a workload. A more powerful setup is a multi-node setup which can be launched with the deepspeed launcher. For this guide, let's assume there are two nodes with 8 GPUs each. The first node can be accessed ssh hostname1 and the second node with ssh hostname2. Both nodes must be able to communicate with each other locally over ssh without a password.
+By default, DeepSpeed expects your multi-node environment to use a shared storage. If this is not the case and each node can only see the local filesystem, you need to adjust the config file to include a checkpoint to allow loading without access to a shared filesystem:
+yaml
+{
+  "checkpoint": {
+    "use_node_local_storage": true
+  }
+}
+You could also use the [Trainer]'s --save_on_each_node argument to automatically add the above checkpoint to your config.
+
+For torchrun, you have to ssh to each node and run the following command on both of them. The launcher waits until both nodes are synchronized before launching the training.
+
+torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \
+--master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json
+
+For the deepspeed launcher, start by creating a hostfile.
+
+hostname1 slots=8
+hostname2 slots=8
+Then you can launch the training with the following command. The deepspeed launcher automatically launches the command on both nodes at once.
+
+deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \
+your_program.py <normal cl args> --deepspeed ds_config.json
+Check out the Resource Configuration (multi-node) guide for more details about configuring multi-node compute resources.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c03a96a59546e5fedf7301cf9e019cba27fae28a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_18.txt
@@ -0,0 +1,36 @@
+SLURM
+In a SLURM environment, you'll need to adapt your SLURM script to your specific SLURM environment. An example SLURM script may look like:
+```bash
+SBATCH --job-name=test-nodes        # name
+SBATCH --nodes=2                    # nodes
+SBATCH --ntasks-per-node=1          # crucial - only 1 task per dist per node!
+SBATCH --cpus-per-task=10           # number of cores per tasks
+SBATCH --gres=gpu:8                 # number of gpus
+SBATCH --time 20:00:00              # maximum execution time (HH:MM:SS)
+SBATCH --output=%x-%j.out           # output file name
+export GPUS_PER_NODE=8
+export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
+export MASTER_PORT=9901
+srun --jobid $SLURM_JOBID bash -c 'python -m torch.distributed.run \
+ --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \
+ --master_addr $MASTER_ADDR --master_port $MASTER_PORT \
+your_program.py  --deepspeed ds_config.json'
+
+Then you can schedule your multi-node deployment with the following command which launches training simultaneously on all nodes.
+
+sbatch launch.slurm
+Notebook
+The deepspeed launcher doesn't support deployment from a notebook so you'll need to emulate the distributed environment. However, this only works for 1 GPU. If you want to use more than 1 GPU, you must use a multi-process environment for DeepSpeed to work. This means you have to use the deepspeed launcher which can't be emulated as shown here.
+
+DeepSpeed requires a distributed environment even when only one process is used.
+This emulates a launcher in the notebook
+import os
+os.environ["MASTER_ADDR"] = "localhost"
+os.environ["MASTER_PORT"] = "9994"  # modify if RuntimeError: Address already in use
+os.environ["RANK"] = "0"
+os.environ["LOCAL_RANK"] = "0"
+os.environ["WORLD_SIZE"] = "1"
+Now proceed as normal, plus pass the DeepSpeed config file
+training_args = TrainingArguments(, deepspeed="ds_config_zero3.json")
+trainer = Trainer()
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..236aeb5e1822294a292dc0f90c19966471a297c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_19.txt
@@ -0,0 +1,62 @@
+If you want to create the config file on the fly in the notebook in the current directory, you could have a dedicated cell.
+
+%%bash
+cat <<'EOT' > ds_config_zero3.json
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
+
+"zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "offload_param": {
+        "device": "cpu",
+        "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+},
+
+"gradient_accumulation_steps": "auto",
+"gradient_clipping": "auto",
+"steps_per_print": 2000,
+"train_batch_size": "auto",
+"train_micro_batch_size_per_gpu": "auto",
+"wall_clock_breakdown": false
+
+}
+EOT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6319c7f09e4d47513319d4f54b496e9cb42df43f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+After you've installed DeepSpeed and have a better idea of your memory requirements, the next step is selecting a ZeRO stage to use. In order of fastest and most memory-efficient:
+| Fastest          | Memory efficient |
+|------------------|------------------|
+| ZeRO-1           | ZeRO-3 + offload |
+| ZeRO-2           | ZeRO-3           |
+| ZeRO-2 + offload | ZeRO-2 + offload |
+| ZeRO-3           | ZeRO-2           |
+| ZeRO-3 + offload | ZeRO-1           |
+To find what works best for you, start with the fastest approach and if you run out of memory, try the next stage which is slower but more memory efficient. Feel free to work in whichever direction you prefer (starting with the most memory efficient or fastest) to discover the appropriate balance between speed and memory usage.
+A general process you can use is (start with batch size of 1):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90d6f08ee0f3641717a976bd0fa5aecb30531bce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_20.txt
@@ -0,0 +1,33 @@
+}
+EOT
+
+If the training script is in a file and not in a notebook cell, you can launch deepspeed normally from the shell in a notebook cell. For example, to launch run_translation.py:
+py
+!git clone https://github.com/huggingface/transformers
+!cd transformers; deepspeed examples/pytorch/translation/run_translation.py 
+You could also use %%bash magic and write multi-line code to run the shell program, but you won't be able to view the logs until training is complete. With %%bash magic, you don't need to emulate a distributed environment.
+
+%%bash
+git clone https://github.com/huggingface/transformers
+cd transformers
+deepspeed examples/pytorch/translation/run_translation.py 
+
+Save model weights
+DeepSpeed stores the main full precision fp32 weights in custom checkpoint optimizer files (the glob pattern looks like global_step*/*optim_states.pt) and are saved under the normal checkpoint.
+
+A model trained with ZeRO-2 saves the pytorch_model.bin weights in fp16. To save the model weights in fp16 for a model trained with ZeRO-3, you need to set "stage3_gather_16bit_weights_on_model_save": true because the model weights are partitioned across multiple GPUs. Otherwise, the [Trainer] won't save the weights in fp16 and it won't create a pytorch_model.bin file. This is because DeepSpeed's state_dict contains a placeholder instead of the real weights and you won't be able to load them.
+yaml
+{
+    "zero_optimization": {
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+
+The full precision weights shouldn't be saved during training because it can require a lot of memory. It is usually best to save the fp32 weights offline after training is complete. But if you have a lot of free CPU memory, it is possible to save the fp32 weights during training. This section covers both online and offline approaches.
+Online
+You must have saved at least one checkpoint to load the latest checkpoint as shown in the following:
+
+from transformers.trainer_utils import get_last_checkpoint
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = get_last_checkpoint(trainer.args.output_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b48e95d56cc730d5fe367695866cfa547e080d3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_21.txt
@@ -0,0 +1,19 @@
+If you've enabled the --load_best_model_at_end parameter to track the best checkpoint in [TrainingArguments], you can finish training first and save the final model explicitly. Then you can reload it as shown below:
+
+from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+checkpoint_dir = os.path.join(trainer.args.output_dir, "checkpoint-final")
+trainer.deepspeed.save_checkpoint(checkpoint_dir)
+fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+
+Once load_state_dict_from_zero_checkpoint is run, the model is no longer usable in DeepSpeed in the context of the same application. You'll need to initialize the DeepSpeed engine again since model.load_state_dict(state_dict) removes all the DeepSpeed magic from it. Only use this at the very end of training.
+
+You can also extract and load the state_dict of the fp32 weights:
+
+from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)  # already on cpu
+model = model.cpu()
+model.load_state_dict(state_dict)
+
+Offline
+DeepSpeed provides a zero_to_fp32.py script at the top-level of the checkpoint folder for extracting weights at any point. This is a standalone script and you don't need a configuration file or [Trainer].
+For example, if your checkpoint folder looked like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4d4b9f74429e795adb7720e492b57bfa54c33e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_22.txt
@@ -0,0 +1,18 @@
+$ ls -l output_dir/checkpoint-1/
+-rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json
+drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/
+-rw-rw-r-- 1 stas stas   12 Mar 27 13:16 latest
+-rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt
+-rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin
+-rw-rw-r-- 1 stas stas  623 Mar 27 20:42 scheduler.pt
+-rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json
+-rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model
+-rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json
+-rw-rw-r-- 1 stas stas  339 Mar 27 20:42 trainer_state.json
+-rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin
+-rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*
+To reconstruct the fp32 weights from the DeepSpeed checkpoint (ZeRO-2 or ZeRO-3) subfolder global_step1, run the following command to create and consolidate the full fp32 weights from multiple GPUs into a single pytorch_model.bin file. The script automatically discovers the subfolder containing the checkpoint.
+py
+python zero_to_fp32.py . pytorch_model.bin
+
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf81afcb25b2398b0a31ceb1e2bf50e418895481
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_23.txt
@@ -0,0 +1,25 @@
+Run python zero_to_fp32.py -h for more usage details. The script requires 2x the general RAM of the final fp32 weights.
+
+ZeRO Inference
+ZeRO Inference places the model weights in CPU or NVMe memory to avoid burdening the GPU which makes it possible to run inference with huge models on a GPU. Inference doesn't require any large additional amounts of memory for the optimizer states and gradients so you can fit much larger batches and/or sequence lengths on the same hardware.
+ZeRO Inference shares the same configuration file as ZeRO-3, and ZeRO-2 and ZeRO-1 configs won't work because they don't provide any benefits for inference.
+To run ZeRO Inference, pass your usual training arguments to the [TrainingArguments] class and add the --do_eval argument.
+
+deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json
+Non-Trainer DeepSpeed integration
+DeepSpeed also works with Transformers without the [Trainer] class. This is handled by the [HfDeepSpeedConfig] which only takes care of gathering ZeRO-3 parameters and splitting a model across multiple GPUs when you call [~PreTrainedModel.from_pretrained].
+
+If you want everything automatically taken care of for you, try using DeepSpeed with the [Trainer]! You'll need to follow the DeepSpeed documentation, and manually configure the parameter values in the config file (you can't use the "auto" value).
+
+To efficiently deploy ZeRO-3, you must instantiate the [HfDeepSpeedConfig] object before the model and keep that object alive:
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+model = AutoModel.from_pretrained("openai-community/gpt2")
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f1d40db468c1bd69923c5e7ebc05a0dd13b53e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_24.txt
@@ -0,0 +1,32 @@
+[HfDeepSpeedConfig] is not required for ZeRO-1 or ZeRO-2.
+
+from transformers.integrations import HfDeepSpeedConfig
+from transformers import AutoModel, AutoConfig
+import deepspeed
+ds_config = {}  # deepspeed config object or path to the file
+must run before instantiating the model to detect zero 3
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+config = AutoConfig.from_pretrained("openai-community/gpt2")
+model = AutoModel.from_config(config)
+engine = deepspeed.initialize(model=model, config_params=ds_config, )
+
+Non-Trainer ZeRO Inference
+To run ZeRO Inference without the [Trainer] in cases where you can’t fit a model onto a single GPU, try using additional GPUs or/and offloading to CPU memory. The important nuance to understand here is that the way ZeRO is designed, you can process different inputs on different GPUs in parallel.
+Make sure to:
+
+disable CPU offload if you have enough GPU memory (since it slows things down).
+enable bf16 if you have an Ampere or newer GPU to make things faster. If you don’t have one of these GPUs, you may enable fp16 as long as you don’t use a model pretrained in bf16 (T5 models) because it may lead to an overflow error.
+
+Take a look at the following script to get a better idea of how to run ZeRO Inference without the [Trainer] on a model that won't fit on a single GPU.
+
+!/usr/bin/env python
+This script demonstrates how to use Deepspeed ZeRO in an inference mode when one can't fit a model
+into a single GPU
+
+1. Use 1 GPU with CPU offload
+2. Or use multiple GPUs instead
+
+First you need to install deepspeed: pip install deepspeed
+
+Here we use a 3B "bigscience/T0_3B" model which needs about 15GB GPU RAM - so 1 largish or 2
+small GPUs can handle it. or 1 small GPU and a lot of CPU memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..55a29d885414e369c20fe44e0548f958374ed902
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_25.txt
@@ -0,0 +1,46 @@
+To use a larger model like "bigscience/T0" which needs about 50GB, unless you have an 80GB GPU -
+you will need 2-4 gpus. And then you can adapt the script to handle more gpus if you want to
+process multiple inputs at once.
+
+The provided deepspeed config also activates CPU memory offloading, so chances are that if you
+have a lot of available CPU memory and you don't mind a slowdown you should be able to load a
+model that doesn't normally fit into a single GPU. If you have enough GPU memory the program will
+run faster if you don't want offload to CPU - so disable that section then.
+
+To deploy on 1 gpu:
+
+deepspeed --num_gpus 1 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=1 t0.py
+
+To deploy on 2 gpus:
+
+deepspeed --num_gpus 2 t0.py
+or:
+python -m torch.distributed.run --nproc_per_node=2 t0.py
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM
+from transformers.integrations import HfDeepSpeedConfig
+import deepspeed
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
+distributed setup
+local_rank = int(os.getenv("LOCAL_RANK", "0"))
+world_size = int(os.getenv("WORLD_SIZE", "1"))
+torch.cuda.set_device(local_rank)
+deepspeed.init_distributed()
+model_name = "bigscience/T0_3B"
+config = AutoConfig.from_pretrained(model_name)
+model_hidden_size = config.d_model
+batch size has to be divisible by world_size, but can be bigger than world_size
+train_batch_size = 1 * world_size
+ds_config notes
+
+- enable bf16 if you use Ampere or higher GPU - this will run in mixed precision and will be
+faster.
+
+- for older GPUs you can enable fp16, but it'll only work for non-bf16 pretrained models - e.g.
+all official t5 models are bf16-pretrained
+
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..927575d6ec839a744980e62cb2f53f6925bcabe6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_26.txt
@@ -0,0 +1,39 @@
+- set offload_param.device to "none" or completely remove the offload_param section if you don't
+- want CPU offload
+
+- if using offload_param you can manually finetune stage3_param_persistence_threshold to control
+- which params should remain on gpus - the larger the value the smaller the offload size
+
+For in-depth info on Deepspeed config see
+https://huggingface.co/docs/transformers/main/main_classes/deepspeed
+keeping the same format as json for consistency, except it uses lower case for true/false
+fmt: off
+ds_config = {
+    "fp16": {
+        "enabled": False
+    },
+    "bf16": {
+        "enabled": False
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": True
+        },
+        "overlap_comm": True,
+        "contiguous_gradients": True,
+        "reduce_bucket_size": model_hidden_size * model_hidden_size,
+        "stage3_prefetch_bucket_size": 0.9 * model_hidden_size * model_hidden_size,
+        "stage3_param_persistence_threshold": 10 * model_hidden_size
+    },
+    "steps_per_print": 2000,
+    "train_batch_size": train_batch_size,
+    "train_micro_batch_size_per_gpu": 1,
+    "wall_clock_breakdown": False
+}
+fmt: on
+next line instructs transformers to partition the model directly over multiple gpus using
+deepspeed.zero.Init when model's from_pretrained method is called.
+
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cabb98fc2a80ea0f9e23f1511d8022b43f740895
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_27.txt
@@ -0,0 +1,27 @@
+it has to be run before loading the model AutoModelForSeq2SeqLM.from_pretrained(model_name)
+
+otherwise the model will first be loaded normally and only partitioned at forward time which is
+less efficient and when there is little CPU RAM may fail
+dschf = HfDeepSpeedConfig(ds_config)  # keep this object alive
+now a model can be loaded.
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+initialise Deepspeed ZeRO and store only the engine object
+ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
+ds_engine.module.eval()  # inference
+Deepspeed ZeRO can process unrelated inputs on each GPU. So for 2 gpus you process 2 inputs at once.
+If you use more GPUs adjust for more.
+And of course if you have just one input to process you then need to pass the same string to both gpus
+If you use only one GPU, then you will have only rank 0.
+rank = torch.distributed.get_rank()
+if rank == 0:
+    text_in = "Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"
+elif rank == 1:
+    text_in = "Is this review positive or negative? Review: this is the worst restaurant ever"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+inputs = tokenizer.encode(text_in, return_tensors="pt").to(device=local_rank)
+with torch.no_grad():
+    outputs = ds_engine.module.generate(inputs, synced_gpus=True)
+text_out = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(f"rank{rank}:\n   in={text_in}\n  out={text_out}")
+
+Save the script as t0.py and launch it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_28.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_28.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b9b3f882e6db72c45bd6e6d4117ac5354ffe9bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_28.txt
@@ -0,0 +1,30 @@
+Save the script as t0.py and launch it:
+
+$ deepspeed --num_gpus 2 t0.py
+rank0:
+   in=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy
+  out=Positive
+rank1:
+   in=Is this review positive or negative? Review: this is the worst restaurant ever
+  out=negative
+This is a very basic example and you'll want to adapt it to your use case.
+Generate
+Using multiple GPUs with ZeRO-3 for generation requires synchronizing the GPUs by setting synced_gpus=True in the [~GenerationMixin.generate] method. Otherwise, if one GPU is finished generating before another one, the whole system hangs because the remaining GPUs haven't received the weight shard from the GPU that finished first.
+For Transformers>=4.28, if synced_gpus is automatically set to True if multiple GPUs are detected during generation.
+Troubleshoot
+When you encounter an issue, you should consider whether DeepSpeed is the cause of the problem because often it isn't (unless it's super obviously and you can see DeepSpeed modules in the exception)! The first step should be to retry your setup without DeepSpeed, and if the problem persists, then you can report the issue. If the issue is a core DeepSpeed problem and unrelated to the Transformers integration, open an Issue on the DeepSpeed repository.
+For issues related to the Transformers integration, please provide the following information:
+
+the full DeepSpeed config file
+
+the command line arguments of the [Trainer], or [TrainingArguments] arguments if you're scripting the [Trainer] setup yourself (don't dump the [TrainingArguments] which has dozens of irrelevant entries)
+
+the outputs of:
+
+python -c 'import torch; print(f"torch: {torch.__version__}")'
+python -c 'import transformers; print(f"transformers: {transformers.__version__}")'
+python -c 'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'
+
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_29.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_29.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb1f64dbec6e8a52a9b1710f7cdd1b659534872b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_29.txt
@@ -0,0 +1,23 @@
+a link to a Google Colab notebook to reproduce the issue
+
+if impossible, a standard and non-custom dataset we can use and also try to use an existing example to reproduce the issue with
+
+The following sections provide a guide for resolving two of the most common issues.
+DeepSpeed process killed at startup
+When the DeepSpeed process is killed during launch without a traceback, that usually means the program tried to allocate more CPU memory than your system has or your process tried to allocate more CPU memory than allowed leading the OS kernel to terminate the process. In this case, check whether your configuration file has either offload_optimizer, offload_param or both configured to offload to the CPU. 
+If you have NVMe and ZeRO-3 setup, experiment with offloading to the NVMe (estimate the memory requirements for your model).
+NaN loss
+NaN loss often occurs when a model is pretrained in bf16 and then you try to use it with fp16 (especially relevant for TPU trained models). To resolve this, use fp32 or bf16 if your hardware supports it (TPU, Ampere GPUs or newer).
+The other issue may be related to using fp16. For example, if this is your fp16 configuration:
+yaml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    }
+}
+You might see the following OVERFLOW! messages in the logs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86b6662b9f168dadecf7401f13b54bc05d12bf24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_3.txt
@@ -0,0 +1,35 @@
+enable gradient checkpointing
+try ZeRO-2
+try ZeRO-2 and offload the optimizer
+try ZeRO-3
+try ZeRO-3 and offload parameters to the CPU
+try ZeRO-3 and offload parameters and the optimizer to the CPU
+try lowering various default values like a narrower search beam if you're using the [~GenerationMixin.generate] method
+try mixed half-precision (fp16 on older GPU architectures and bf16 on Ampere) over full-precision weights
+add more hardware if possible or enable Infinity to offload parameters and the optimizer to a NVMe
+once you're not running out of memory, measure effective throughput and then try to increase the batch size as large as you can to maximize GPU efficiency
+lastly, try to optimize your training setup by disabling some offload features or use a faster ZeRO stage and increasing/decreasing the batch size to find the best tradeoff between speed and memory usage
+
+DeepSpeed configuration file
+DeepSpeed works with the [Trainer] class by way of a config file containing all the parameters for configuring how you want setup your training run. When you execute your training script, DeepSpeed logs the configuration it received from [Trainer] to the console so you can see exactly what configuration was used.
+
+Find a complete list of DeepSpeed configuration options on the DeepSpeed Configuration JSON reference. You can also find more practical examples of various DeepSpeed configuration examples on the DeepSpeedExamples repository or the main DeepSpeed repository. To quickly find specific examples, you can:
+```bash
+git clone https://github.com/microsoft/DeepSpeedExamples
+cd DeepSpeedExamples
+find . -name '*json'
+find examples with the Lamb optimizer
+grep -i Lamb $(find . -name '*json')
+
+The DeepSpeed configuration file is passed as a path to a JSON file if you're training from the command line interface or as a nested dict object if you're using the [Trainer] in a notebook setting.
+
+py
+TrainingArguments(, deepspeed="path/to/deepspeed_config.json")
+
+py
+ds_config_dict = dict(scheduler=scheduler_params, optimizer=optimizer_params)
+args = TrainingArguments(, deepspeed=ds_config_dict)
+trainer = Trainer(model, args, )
+
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_30.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_30.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2352a1ca51a7e19ea74c77c6cf3dc98dab9954e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_30.txt
@@ -0,0 +1,22 @@
+0%|                                                                                                                             | 0/189 [00:00<?, ?it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144
+  1%|▌                                                                                                                    | 1/189 [00:00<01:26,  2.17it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0
+  1%|█▏
+ []
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 14%|████████████████▌                                                                                                   | 27/189 [00:14<01:13,  2.21it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▏                                                                                                  | 28/189 [00:14<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+ 15%|█████████████████▊                                                                                                  | 29/189 [00:15<01:13,  2.18it/s]
+ [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1
+[]
+This means the DeepSpeed loss scaler is unable to find a scaling coefficient to overcome loss overflow. To fix it, try a higher initial_scale_power value (32 usually works).
+Resources
+DeepSpeed ZeRO is a powerful technology for training and loading very large models for inference with limited GPU resources, making it more accessible to everyone. To learn more about DeepSpeed, feel free to read the blog posts, documentation, and GitHub repository. 
+The following papers are also a great resource for learning more about ZeRO:
+
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models
+ZeRO-Offload: Democratizing Billion-Scale Model Training
+ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f19d41b11b4f106ea2fd6b37258e151b16eb106a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_4.txt
@@ -0,0 +1,31 @@
+DeepSpeed and Trainer parameters
+There are three types of configuration parameters:
+
+Some of the configuration parameters are shared by [Trainer] and DeepSpeed, and it can be difficult to identify errors when there are conflicting definitions. To make it easier, these shared configuration parameters are configured from the [Trainer] command line arguments.
+
+Some configuration parameters that are automatically derived from the model configuration so you don't need to manually adjust these values. The [Trainer] uses a configuration value auto to determine set the most correct or efficient value. You could set your own configuration parameters explicitly, but you must take care to ensure the [Trainer] arguments and DeepSpeed configuration parameters agree. Mismatches may cause the training to fail in very difficult to detect ways!
+
+Some configuration parameters specific to DeepSpeed only which need to be manually set based on your training needs.
+
+You could also modify the DeepSpeed configuration and edit [TrainingArguments] from it:
+
+Create or load a DeepSpeed configuration to used as the main configuration
+Create a [TrainingArguments] object based on these DeepSpeed configuration values
+
+Some values, such as scheduler.params.total_num_steps are calculated by the [Trainer] during training.
+ZeRO configuration
+There are three configurations, each corresponding to a different ZeRO stage. Stage 1 is not as interesting for scalability, and this guide focuses on stages 2 and 3. The zero_optimization configuration contains all the options for what to enable and how to configure them. For a more detailed explanation of each parameter, take a look at the DeepSpeed Configuration JSON reference.
+
+DeepSpeed doesn’t validate parameter names and any typos fallback on the parameter's default setting. You can watch the DeepSpeed engine startup log messages to see what values it is going to use.
+
+The following configurations must be setup with DeepSpeed because the [Trainer] doesn't provide equivalent command line arguments.
+
+ZeRO-1 shards the optimizer states across GPUs, and you can expect a tiny speed up. The ZeRO-1 config can be setup like this:
+yml
+{
+    "zero_optimization": {
+        "stage": 1
+    }
+}
+
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..949fffb8826e49380be37baff5f4d992f75bc767
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_5.txt
@@ -0,0 +1,26 @@
+ZeRO-2 shards the optimizer and gradients across GPUs. This stage is primarily used for training since it's features are not relevant to inference. Some important parameters to configure for better performance include:
+
+offload_optimizer should be enabled to reduce GPU memory usage.
+overlap_comm when set to true trades off increased GPU memory usage to lower allreduce latency. This feature uses 4.5x the allgather_bucket_size and reduce_bucket_size values. In this example, they're set to 5e8 which means it requires 9GB of GPU memory. If your GPU memory is 8GB or less, you should reduce overlap_comm to lower the memory requirements and prevent an out-of-memory (OOM) error.
+allgather_bucket_size and reduce_bucket_size trade off available GPU memory for communication speed. The smaller their values, the slower communication is and the more GPU memory is available. You can balance, for example, whether a bigger batch size is more important than a slightly slower training time.
+round_robin_gradients is available in DeepSpeed 0.4.4 for CPU offloading. It parallelizes gradient copying to CPU memory among ranks by fine-grained gradient partitioning. Performance benefit grows with gradient accumulation steps (more copying between optimizer steps) or GPU count (increased parallelism).
+
+yml
+{
+    "zero_optimization": {
+        "stage": 2,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "allgather_partitions": true,
+        "allgather_bucket_size": 5e8,
+        "overlap_comm": true,
+        "reduce_scatter": true,
+        "reduce_bucket_size": 5e8,
+        "contiguous_gradients": true
+        "round_robin_gradients": true
+    }
+}
+
+ZeRO-3 shards the optimizer, gradient, and parameters across GPUs. Unlike ZeRO-2, ZeRO-3 can also be used for inference, in addition to training, because it allows large models to be loaded on multiple GPUs. Some important parameters to configure include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d91e72650a7b6484caa0e1d4b7700b8ce484cef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+device: "cpu" can help if you're running out of GPU memory and if you have free CPU memory available. This allows offloading model parameters to the CPU.
+pin_memory: true can improve throughput, but less memory becomes available for other processes because the pinned memory is reserved for the specific process that requested it and it's typically accessed much faster than normal CPU memory.
+stage3_max_live_parameters is the upper limit on how many full parameters you want to keep on the GPU at any given time. Reduce this value if you encounter an OOM error.
+stage3_max_reuse_distance is a value for determining when a parameter is used again in the future, and it helps decide whether to throw the parameter away or to keep it. If the parameter is going to be reused (if the value is less than stage3_max_reuse_distance), then it is kept to reduce communication overhead. This is super helpful when activation checkpointing is enabled and you want to keep the parameter in the forward recompute until the backward pass. But reduce this value if you encounter an OOM error.
+stage3_gather_16bit_weights_on_model_save consolidates fp16 weights when a model is saved. For large models and multiple GPUs, this is an expensive in terms of memory and speed. You should enable it if you're planning on resuming training.
+
+sub_group_size controls which parameters are updated during the optimizer step. Parameters are grouped into buckets of sub_group_size and each bucket is updated one at a time. When used with NVMe offload, sub_group_size determines when model states are moved in and out of CPU memory from during the optimization step. This prevents running out of CPU memory for extremely large models. sub_group_size can be left to its default value if you aren't using NVMe offload, but you may want to change it if you:
+
+Run into an OOM error during the optimizer step. In this case, reduce sub_group_size to reduce memory usage of the temporary buffers.
+The optimizer step is taking a really long time. In this case, increase sub_group_size to improve bandwidth utilization as a result of increased data buffers.
+
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..499592a553236351439f21f7d76e936f52a41135
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_7.txt
@@ -0,0 +1,39 @@
+reduce_bucket_size, stage3_prefetch_bucket_size, and stage3_param_persistence_threshold are dependent on a model's hidden size. It is recommended to set these values to auto and allow the [Trainer] to automatically assign the values.
+
+yml
+{
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "sub_group_size": 1e9,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": true
+    }
+}
+You can use the deepspeed.zero.Init context manager to initialize a model faster:
+
+from transformers import T5ForConditionalGeneration, T5Config
+import deepspeed
+with deepspeed.zero.Init():
+    config = T5Config.from_pretrained("google-t5/t5-small")
+    model = T5ForConditionalGeneration(config)
+
+For pretrained models, the DeepSped config file needs to have is_deepspeed_zero3_enabled: true setup in [TrainingArguments] and it needs a ZeRO configuration enabled. The [TrainingArguments] object must be created before calling the model [~PreTrainedModel.from_pretrained].
+
+from transformers import AutoModel, Trainer, TrainingArguments
+training_args = TrainingArguments(, deepspeed=ds_config)
+model = AutoModel.from_pretrained("google-t5/t5-small")
+trainer = Trainer(model=model, args=training_args, )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fef380e1fd25564e2681c649b73607f747e5e5f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+You'll need ZeRO-3 if the fp16 weights don't fit on a single GPU. If you're able to load fp16 weights, then make sure you specify torch_dtype=torch.float16 in [~PreTrainedModel.from_pretrained].
+Another consideration for ZeRO-3 is if you have multiple GPUs, no single GPU has all the parameters unless it's the parameters for the currently executing layer. To access all parameters from all the layers at once, such as loading pretrained model weights in [~PreTrainedModel.from_pretrained], one layer is loaded at a time and immediately partitioned to all GPUs. This is because for very large models, it isn't possible to load the weights on one GPU and then distribute them across the other GPUs due to memory limitations.
+If you encounter a model parameter weight that looks like the following, where tensor([1.]) or the parameter size is 1 instead of a larger multi-dimensional shape, this means the parameter is partitioned and this is a ZeRO-3 placeholder.
+py
+tensor([1.0], device="cuda:0", dtype=torch.float16, requires_grad=True)
+
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ceeec3908cd6bdbc330efc53968a3d8cd5ac0bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_deepspeed.txt_chunk_9.txt
@@ -0,0 +1,34 @@
+For more information about initializing large models with ZeRO-3 and accessing the parameters, take a look at the Constructing Massive Models and Gathering Parameters guides.
+
+NVMe configuration
+ZeRO-Infinity allows offloading model states to the CPU and/or NVMe to save even more memory. Smart partitioning and tiling algorithms allow each GPU to send and receive very small amounts of data during offloading such that a modern NVMe can fit an even larger total memory pool than is available to your training process. ZeRO-Infinity requires ZeRO-3.
+Depending on the CPU and/or NVMe memory available, you can offload both the optimizer states and parameters, just one of them, or none. You should also make sure the nvme_path is pointing to an NVMe device, because while it still works with a normal hard drive or solid state drive, it'll be significantly slower. With a modern NVMe, you can expect peak transfer speeds of ~3.5GB/s for read and ~3GB/s for write operations. Lastly, run a benchmark on your training setup to determine the optimal aio configuration.
+The example ZeRO-3/Infinity configuration file below sets most of the parameter values to auto, but you could also manually add these values.
+```yml
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 16,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+"optimizer": {
+    "type": "AdamW",
+    "params": {
+        "lr": "auto",
+        "betas": "auto",
+        "eps": "auto",
+        "weight_decay": "auto"
+    }
+},
+
+"scheduler": {
+    "type": "WarmupLR",
+    "params": {
+        "warmup_min_lr": "auto",
+        "warmup_max_lr": "auto",
+        "warmup_num_steps": "auto"
+    }
+},
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2645dc0d9cf9125a758c19d06a1ff07d3991539d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fast_tokenizers.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+Use tokenizers from 🤗 Tokenizers
+The [PreTrainedTokenizerFast] depends on the 🤗 Tokenizers library. The tokenizers obtained from the 🤗 Tokenizers library can be
+loaded very simply into 🤗 Transformers.
+Before getting in the specifics, let's first start by creating a dummy tokenizer in a few lines:
+thon
+
+from tokenizers import Tokenizer
+from tokenizers.models import BPE
+from tokenizers.trainers import BpeTrainer
+from tokenizers.pre_tokenizers import Whitespace
+tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
+trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
+tokenizer.pre_tokenizer = Whitespace()
+files = []
+tokenizer.train(files, trainer)
+
+We now have a tokenizer trained on the files we defined. We can either continue using it in that runtime, or save it to
+a JSON file for future re-use.
+Loading directly from the tokenizer object
+Let's see how to leverage this tokenizer object in the 🤗 Transformers library. The
+[PreTrainedTokenizerFast] class allows for easy instantiation, by accepting the instantiated
+tokenizer object as an argument:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
+Loading from a JSON file
+In order to load a tokenizer from a JSON file, let's first start by saving our tokenizer:
+thon
+
+tokenizer.save("tokenizer.json")
+
+The path to which we saved this file can be passed to the [PreTrainedTokenizerFast] initialization
+method using the tokenizer_file parameter:
+thon
+
+from transformers import PreTrainedTokenizerFast
+fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file="tokenizer.json")
+
+This object can now be used with all the methods shared by the 🤗 Transformers tokenizers! Head to the tokenizer
+page for more information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0082c305e589757a939f37d898553972444cbe6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Fully Sharded Data Parallel
+Fully Sharded Data Parallel (FSDP) is a data parallel method that shards a model's parameters, gradients and optimizer states across the number of available GPUs (also called workers or rank). Unlike DistributedDataParallel (DDP), FSDP reduces memory-usage because a model is replicated on each GPU. This improves GPU memory-efficiency and allows you to train much larger models on fewer GPUs. FSDP is integrated with the Accelerate, a library for easily managing training in distributed environments, which means it is available for use from the [Trainer] class.
+Before you start, make sure Accelerate is installed and at least PyTorch 2.1.0 or newer.
+
+pip install accelerate
+FSDP configuration
+To start, run the accelerate config command to create a configuration file for your training environment. Accelerate uses this configuration file to automatically setup the correct training environment based on your selected training options in accelerate config.
+
+accelerate config
+When you run accelerate config, you'll be prompted with a series of options to configure your training environment. This section covers some of the most important FSDP options. To learn more about the other available FSDP options, take a look at the fsdp_config parameters.
+Sharding strategy
+FSDP offers a number of sharding strategies to select from:
+
+FULL_SHARD - shards model parameters, gradients and optimizer states across workers; select 1 for this option
+SHARD_GRAD_OP- shard gradients and optimizer states across workers; select 2 for this option
+NO_SHARD - don't shard anything (this is equivalent to DDP); select 3 for this option
+HYBRID_SHARD - shard model parameters, gradients and optimizer states within each worker where each worker also has a full copy; select 4 for this option
+HYBRID_SHARD_ZERO2 - shard gradients and optimizer states within each worker where each worker also has a full copy; select 5 for this option
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5d37539113e4b98682e59015833230f0112a30e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+This is enabled by the fsdp_sharding_strategy flag.
+CPU offload
+You could also offload parameters and gradients when they are not in use to the CPU to save even more GPU memory and help you fit large models where even FSDP may not be sufficient. This is enabled by setting fsdp_offload_params: true when running accelerate config.
+Wrapping policy
+FSDP is applied by wrapping each layer in the network. The wrapping is usually applied in a nested way where the full weights are discarded after each forward pass to save memory for use in the next layer. The auto wrapping policy is the simplest way to implement this and you don't need to change any code. You should select fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP to wrap a Transformer layer and fsdp_transformer_layer_cls_to_wrap to specify which layer to wrap (for example BertLayer).
+Otherwise, you can choose a size-based wrapping policy where FSDP is applied to a layer if it exceeds a certain number of parameters. This is enabled by setting fsdp_wrap_policy: SIZE_BASED_WRAP and min_num_param to the desired size threshold.
+Checkpointing
+Intermediate checkpoints should be saved with fsdp_state_dict_type: SHARDED_STATE_DICT because saving the full state dict with CPU offloading on rank 0 takes a lot of time and often results in NCCL Timeout errors due to indefinite hanging during broadcasting. You can resume training with the sharded state dicts with the [~accelerate.Accelerator.load_state]` method.
+
+directory containing checkpoints
+accelerator.load_state("ckpt")
+
+However, when training ends, you want to save the full state dict because sharded state dict is only compatible with FSDP.
+
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eabaebf2bf4ff331ed8d623e18bc65a9d6156bd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+if trainer.is_fsdp_enabled:
+    trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
+trainer.save_model(script_args.output_dir)
+
+TPU
+PyTorch XLA supports FSDP training for TPUs and it can be enabled by modifying the FSDP configuration file generated by accelerate config. In addition to the sharding strategies and wrapping options specified above, you can add the parameters shown below to the file.
+yaml
+xla: True # must be set to True to enable PyTorch/XLA
+xla_fsdp_settings: # XLA-specific FSDP parameters
+xla_fsdp_grad_ckpt: True # use gradient checkpointing
+The xla_fsdp_settings allow you to configure additional XLA-specific parameters for FSDP.
+Launch training
+An example FSDP configuration file may look like:
+yaml
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: SHARDED_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+To launch training, run the accelerate launch command and it'll automatically use the configuration file you previously created with accelerate config.
+
+accelerate launch my-trainer-script.py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a9d8f7b540f3ffc1d0a163943b8759050cd5613
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_fsdp.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+accelerate launch my-trainer-script.py
+
+accelerate launch --fsdp="full shard" --fsdp_config="path/to/fsdp_config/ my-trainer-script.py
+Next steps
+FSDP can be a powerful tool for training really large models and you have access to more than one GPU or TPU. By sharding the model parameters, optimizer and gradient states, and even offloading them to the CPU when they're inactive, FSDP can reduce the high cost of large-scale training. If you're interested in learning more, the following may be helpful:
+
+Follow along with the more in-depth Accelerate guide for FSDP.
+Read the Introducing PyTorch Fully Sharded Data Parallel (FSDP) API blog post.
+Read the Scaling PyTorch models on Cloud TPUs with FSDP blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..923c67e573bf338095cdcea6c879e3f268bf1323
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Text generation strategies
+Text generation is essential to many NLP tasks, such as open-ended text generation, summarization, translation, and
+more. It also plays a role in a variety of mixed-modality applications that have text as an output like speech-to-text
+and vision-to-text. Some of the models that can generate text include
+GPT2, XLNet, OpenAI GPT, CTRL, TransformerXL, XLM, Bart, T5, GIT, Whisper.
+Check out a few examples that use [~generation.GenerationMixin.generate] method to produce
+text outputs for different tasks:
+* Text summarization
+* Image captioning
+* Audio transcription
+Note that the inputs to the generate method depend on the model's modality. They are returned by the model's preprocessor
+class, such as AutoTokenizer or AutoProcessor. If a model's preprocessor creates more than one kind of input, pass all
+the inputs to generate(). You can learn more about the individual model's preprocessor in the corresponding model's documentation.
+The process of selecting output tokens to generate text is known as decoding, and you can customize the decoding strategy
+that the generate() method will use. Modifying a decoding strategy does not change the values of any trainable parameters.
+However, it can have a noticeable impact on the quality of the generated output. It can help reduce repetition in the text
+and make it more coherent.
+This guide describes:
+* default generation configuration
+* common decoding strategies and their main parameters
+* saving and sharing custom generation configurations with your fine-tuned model on 🤗 Hub
+Default text generation configuration
+A decoding strategy for a model is defined in its generation configuration. When using pre-trained models for inference
+within a [pipeline], the models call the PreTrainedModel.generate() method that applies a default generation
+configuration under the hood. The default configuration is also used when no custom configuration has been saved with
+the model.
+When you load a model explicitly, you can inspect the generation configuration that comes with it through
+ model.generation_config:
+thon
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+model.generation_config
+GenerationConfig {
+  "bos_token_id": 50256,
+  "eos_token_id": 50256
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eef662fb7da15617b1c60e4b0f80c66a8eb645e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Printing out the model.generation_config reveals only the values that are different from the default generation
+configuration, and does not list any of the default values.
+The default generation configuration limits the size of the output combined with the input prompt to a maximum of 20
+tokens to avoid running into resource limitations. The default decoding strategy is greedy search, which is the simplest decoding strategy that picks a token with the highest probability as the next token. For many tasks
+and small output sizes this works well. However, when used to generate longer outputs, greedy search can start
+producing highly repetitive results.
+Customize text generation
+You can override any generation_config by passing the parameters and their values directly to the [generate] method:
+thon
+
+my_model.generate(**inputs, num_beams=4, do_sample=True)  # doctest: +SKIP
+
+Even if the default decoding strategy mostly works for your task, you can still tweak a few things. Some of the
+commonly adjusted parameters include:
+
+max_new_tokens: the maximum number of tokens to generate. In other words, the size of the output sequence, not
+including the tokens in the prompt. As an alternative to using the output's length as a stopping criteria, you can choose
+to stop generation whenever the full generation exceeds some amount of time. To learn more, check [StoppingCriteria].
+num_beams: by specifying a number of beams higher than 1, you are effectively switching from greedy search to
+beam search. This strategy evaluates several hypotheses at each time step and eventually chooses the hypothesis that
+has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with a lower probability initial tokens and would've been ignored by the greedy search. Visualize how it works here.
+do_sample: if set to True, this parameter enables decoding strategies such as multinomial sampling, beam-search
+multinomial sampling, Top-K sampling and Top-p sampling. All these strategies select the next token from the probability
+distribution over the entire vocabulary with various strategy-specific adjustments.
+num_return_sequences: the number of sequence candidates to return for each input. This option is only available for
+the decoding strategies that support multiple sequence candidates, e.g. variations of beam search and sampling. Decoding
+strategies like greedy search and contrastive search return a single output sequence.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69a979fe92f2be7381a187d8aa7e82d61c363410
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_10.txt
@@ -0,0 +1,25 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+checkpoint = "google/pegasus-xsum"
+prompt = (
+     "The Permaculture Design Principles are a set of universal design principles "
+     "that can be applied to any location, climate and culture, and they allow us to design "
+     "the most efficient and sustainable human habitation and food production systems. "
+     "Permaculture is a design system that encompasses a wide variety of disciplines, such "
+     "as ecology, landscape design, environmental science and energy conservation, and the "
+     "Permaculture design principles are drawn from these various disciplines. Each individual "
+     "design principle itself embodies a complete conceptual framework based on sound "
+     "scientific principles. When we bring all these separate  principles together, we can "
+     "create a design system that both looks at whole systems, the parts that these systems "
+     "consist of, and how those parts interact with each other to create a complex, dynamic, "
+     "living system. Each design principle serves as a tool that allows us to integrate all "
+     "the separate parts of a design, referred to as elements, into a functional, synergistic, "
+     "whole system, where the elements harmoniously interact and work together in the most "
+     "efficient way possible."
+ )
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, num_beam_groups=5, max_new_tokens=30, diversity_penalty=1.0)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'The Design Principles are a set of universal design principles that can be applied to any location, climate and
+culture, and they allow us to design the'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba9c304a3d9fe811f278ffda0e225668b1b8c1b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_11.txt
@@ -0,0 +1,29 @@
+This guide illustrates the main parameters that enable various decoding strategies. More advanced parameters exist for the
+[generate] method, which gives you even further control over the [generate] method's behavior.
+For the complete list of the available parameters, refer to the API documentation.
+Speculative Decoding
+Speculative decoding (also known as assisted decoding) is a modification of the decoding strategies above, that uses an
+assistant model (ideally a much smaller one) with the same tokenizer, to generate a few candidate tokens. The main
+model then validates the candidate tokens in a single forward pass, which speeds up the decoding process. If
+do_sample=True, then the token validation with resampling introduced in the
+speculative decoding paper is used.
+Currently, only greedy search and sampling are supported with assisted decoding, and assisted decoding doesn't support batched inputs.
+To learn more about assisted decoding, check this blog post.
+To enable assisted decoding, set the assistant_model argument with a model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob are sitting in a bar. Alice is drinking a beer and Bob is drinking a']
+
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20a73545205d9dd41be8bb052ec842de6282b71c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_12.txt
@@ -0,0 +1,19 @@
+When using assisted decoding with sampling methods, you can use the temperature argument to control the randomness,
+just like in multinomial sampling. However, in assisted decoding, reducing the temperature may help improve the latency.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+set_seed(42)  # For reproducibility
+prompt = "Alice and Bob"
+checkpoint = "EleutherAI/pythia-1.4b-deduped"
+assistant_checkpoint = "EleutherAI/pythia-160m-deduped"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+assistant_model = AutoModelForCausalLM.from_pretrained(assistant_checkpoint)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.5)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Alice and Bob, a couple of friends of mine, who are both in the same office as']
+
+Alternativelly, you can also set the prompt_lookup_num_tokens to trigger n-gram based assisted decoding, as opposed
+to model based assisted decoding. You can read more about it here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c0d9f8a17e52ab278f19aa9412356626163e512
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Save a custom decoding strategy with your model
+If you would like to share your fine-tuned model with a specific generation configuration, you can:
+* Create a [GenerationConfig] class instance
+* Specify the decoding strategy parameters
+* Save your generation configuration with [GenerationConfig.save_pretrained], making sure to leave its config_file_name argument empty
+* Set push_to_hub to True to upload your config to the model's repo
+thon
+
+from transformers import AutoModelForCausalLM, GenerationConfig
+model = AutoModelForCausalLM.from_pretrained("my_account/my_model")  # doctest: +SKIP
+generation_config = GenerationConfig(
+     max_new_tokens=50, do_sample=True, top_k=50, eos_token_id=model.config.eos_token_id
+ )
+generation_config.save_pretrained("my_account/my_model", push_to_hub=True)  # doctest: +SKIP
+
+You can also store several generation configurations in a single directory, making use of the config_file_name
+argument in [GenerationConfig.save_pretrained]. You can later instantiate them with [GenerationConfig.from_pretrained]. This is useful if you want to
+store several generation configurations for a single model (e.g. one for creative text generation with sampling, and
+one for summarization with beam search). You must have the right Hub permissions to add configuration files to a model.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f0125c8a0df8e58ff0761fa6f504f8bc4783825
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig
+tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
+model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
+translation_generation_config = GenerationConfig(
+     num_beams=4,
+     early_stopping=True,
+     decoder_start_token_id=0,
+     eos_token_id=model.config.eos_token_id,
+     pad_token=model.config.pad_token_id,
+ )
+Tip: add push_to_hub=True to push to the Hub
+translation_generation_config.save_pretrained("/tmp", "translation_generation_config.json")
+You could then use the named generation config file to parameterize generation
+generation_config = GenerationConfig.from_pretrained("/tmp", "translation_generation_config.json")
+inputs = tokenizer("translate English to French: Configuration files are easy to use!", return_tensors="pt")
+outputs = model.generate(**inputs, generation_config=generation_config)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Les fichiers de configuration sont faciles à utiliser!']
+
+Streaming
+The generate() supports streaming, through its streamer input. The streamer input is compatible with any instance
+from a class that has the following methods: put() and end(). Internally, put() is used to push new tokens and
+end() is used to flag the end of text generation.
+
+The API for the streamer classes is still under development and may change in the future.
+
+In practice, you can craft your own streaming class for all sorts of purposes! We also have basic streaming classes
+ready for you to use. For example, you can use the [TextStreamer] class to stream the output of generate() into
+your screen, one word at a time:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+inputs = tok(["An increasing sequence: one,"], return_tensors="pt")
+streamer = TextStreamer(tok)
+Despite returning the usual output, the streamer will also print the generated text to stdout.
+_ = model.generate(**inputs, streamer=streamer, max_new_tokens=20)
+An increasing sequence: one, two, three, four, five, six, seven, eight, nine, ten, eleven,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a4eca72e69e0bfb0f9652ee9e70682819d98d65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+KV Cache Quantization
+The generate() method supports caching keys and values to enhance efficiency and avoid re-computations. However the key and value
+cache can occupy a large portion of memory, becoming a bottleneck for long-context generation, especially for Large Language Models.
+Quantizing the cache when using generate() can significantly reduce memory requirements at the cost of speed. 
+KV Cache quantization in transformers is largely inspired by the paper [KIVI: A Tuning-Free Asymmetric 2bit Quantization for KV Cache]
+(https://arxiv.org/abs/2402.02750) and currently supports quanto and HQQ as backends. For more information on the inner workings see the paper.
+To enable quantization of the key-value cache, one needs to indicate cache_implementation="quantized" in the generation_config.
+Quantization related arguments should be passed to the generation_config either as a dict or an instance of a [QuantizedCacheConfig] class.
+One has to indicate which quantization backend to use in the [QuantizedCacheConfig], the default is quanto.
+
+Cache quantization can be detrimental if the context length is short and there is enough GPU VRAM available to run without cache quantization.
+
+thon
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16).to("cuda:0")
+inputs = tokenizer("I like rock music because", return_tensors="pt").to(model.device)
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20, cache_implementation="quantized", cache_config={"nbits": 4, "backend": "quanto"})
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. It's a great way to express myself and rel
+out = model.generate(**inputs, do_sample=False, max_new_tokens=20)
+print(tokenizer.batch_decode(out, skip_special_tokens=True)[0])
+I like rock music because it's loud and energetic. I like to listen to it when I'm feeling
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fad7efb667e3fedd560b75042649e373052e7f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Watermarking
+The generate() supports watermarking the generated text by randomly marking a portion of tokens as "green". 
+When generating the "green" will have a small 'bias' value added to their logits, thus having a higher chance to be generated.
+The watermarked text can be detected by calculating the proportion of "green" tokens in the text and estimating how likely it is
+statistically to obtain that amount of "green" tokens for human-generated text. This watermarking strategy was proposed in the paper 
+"On the Reliability of Watermarks for Large Language Models". For more information on 
+the inner functioning of watermarking, it is recommended to refer to the paper.
+The watermarking can be used with any generative model in tranformers and does not require an extra classification model
+to detect watermarked text. To trigger watermarking, pass in a [WatermarkingConfig] with needed arguments directly to the
+.generate() method or add it to the [GenerationConfig]. Watermarked text can be later detected with a [WatermarkDetector].
+
+The WatermarkDetector internally relies on the proportion of "green" tokens, and whether generated text follows the coloring pattern.
+That is why it is recommended to strip off the prompt text, if it is much longer than the generated text.
+This also can have an effect when one sequence in the batch is a lot longer causing other rows to be padded.
+Additionally, the detector must be initiated with identical watermark configuration arguments used when generating.
+
+Let's generate some text with watermarking. In the below code snippet, we set the bias to 2.5 which is a value that
+will be added to "green" tokens' logits. After generating watermarked text, we can pass it directly to the WatermarkDetector
+to check if the text is machine-generated (outputs True for machine-generated and False otherwise).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29aa18f34b545e05d29085fffc438a47c5badf07
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_6.txt
@@ -0,0 +1,31 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, WatermarkDetector, WatermarkingConfig
+model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+tok = AutoTokenizer.from_pretrained("openai-community/gpt2")
+tok.pad_token_id = tok.eos_token_id
+tok.padding_side = "left"
+inputs = tok(["This is the beginning of a long story", "Alice and Bob are"], padding=True, return_tensors="pt")
+input_len = inputs["input_ids"].shape[-1]
+watermarking_config = WatermarkingConfig(bias=2.5, seeding_scheme="selfhash")
+out = model.generate(**inputs, watermarking_config=watermarking_config, do_sample=False, max_length=20)
+detector = WatermarkDetector(model_config=model.config, device="cpu", watermarking_config=watermarking_config)
+detection_out = detector(out, return_dict=True)
+detection_out.prediction
+array([True, True])
+
+Decoding strategies
+Certain combinations of the generate() parameters, and ultimately generation_config, can be used to enable specific
+decoding strategies. If you are new to this concept, we recommend reading this blog post that illustrates how common decoding strategies work.
+Here, we'll show some of the parameters that control the decoding strategies and illustrate how you can use them.
+Greedy Search
+[generate] uses greedy search decoding by default so you don't have to pass any parameters to enable it. This means the parameters num_beams is set to 1 and do_sample=False.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "I look forward to"
+checkpoint = "distilbert/distilgpt2"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['I look forward to seeing you all again!\n\n\n\n\n\n\n\n\n\n\n']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4ee98c75cf8825b4635411ea2a44694f622e8b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_7.txt
@@ -0,0 +1,26 @@
+Contrastive search
+The contrastive search decoding strategy was proposed in the 2022 paper A Contrastive Framework for Neural Text Generation.
+It demonstrates superior results for generating non-repetitive yet coherent long outputs. To learn how contrastive search
+works, check out this blog post.
+The two main parameters that enable and control the behavior of contrastive search are penalty_alpha and top_k:
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Hugging Face Company is"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, penalty_alpha=0.6, top_k=4, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Hugging Face Company is a family owned and operated business. We pride ourselves on being the best
+in the business and our customer service is second to none.\n\nIf you have any questions about our
+products or services, feel free to contact us at any time. We look forward to hearing from you!']
+
+Multinomial sampling
+As opposed to greedy search that always chooses a token with the highest probability as the
+next token, multinomial sampling (also called ancestral sampling) randomly selects the next token based on the probability distribution over the entire
+vocabulary given by the model. Every token with a non-zero probability has a chance of being selected, thus reducing the
+risk of repetition.
+To enable multinomial sampling set do_sample=True and num_beams=1.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..417d28089c93ce244fbe6ba8fd93b27f75a3fd87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
+set_seed(0)  # For reproducibility
+checkpoint = "openai-community/gpt2-large"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+prompt = "Today was an amazing day because"
+inputs = tokenizer(prompt, return_tensors="pt")
+outputs = model.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Today was an amazing day because we received these wonderful items by the way of a gift shop. The box arrived on a Thursday and I opened it on Monday afternoon to receive the gifts. Both bags featured pieces from all the previous years!\n\nThe box had lots of surprises in it, including some sweet little mini chocolate chips! I don't think I'd eat all of these. This was definitely one of the most expensive presents I have ever got, I actually got most of them for free!\n\nThe first package came"]
+
+Beam-search decoding
+Unlike greedy search, beam-search decoding keeps several hypotheses at each time step and eventually chooses
+the hypothesis that has the overall highest probability for the entire sequence. This has the advantage of identifying high-probability
+sequences that start with lower probability initial tokens and would've been ignored by the greedy search.
+
+You can visualize how beam-search decoding works in this interactive demo: type your input sentence, and play with the parameters to see how the decoding beams change.
+To enable this decoding strategy, specify the num_beams (aka number of hypotheses to keep track of) that is greater than 1.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21d8e54900c141d20bee88ed825d714238255620
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_generation_strategies.txt_chunk_9.txt
@@ -0,0 +1,33 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+prompt = "It is astonishing how one can"
+checkpoint = "openai-community/gpt2-medium"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, max_new_tokens=50)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['It is astonishing how one can have such a profound impact on the lives of so many people in such a short period of
+time."\n\nHe added: "I am very proud of the work I have been able to do in the last few years.\n\n"I have']
+
+Beam-search multinomial sampling
+As the name implies, this decoding strategy combines beam search with multinomial sampling. You need to specify
+the num_beams greater than 1, and set do_sample=True to use this decoding strategy.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, set_seed
+set_seed(0)  # For reproducibility
+prompt = "translate English to German: The house is wonderful."
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+inputs = tokenizer(prompt, return_tensors="pt")
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+outputs = model.generate(**inputs, num_beams=5, do_sample=True)
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Das Haus ist wunderbar.'
+
+Diverse beam search decoding
+The diverse beam search decoding strategy is an extension of the beam search strategy that allows for generating a more diverse
+set of beam sequences to choose from. To learn how it works, refer to Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models.
+This approach has three main parameters: num_beams, num_beam_groups, and diversity_penalty.
+The diversity penalty ensures the outputs are distinct across groups, and beam search is used within each group.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a6d1320b558d8f69c99452fffbd06c766d0bb1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+GGUF and interaction with Transformers
+The GGUF file format is used to store models for inference with GGML and other 
+libraries that depend on it, like the very popular llama.cpp or 
+whisper.cpp.
+It is a file format supported by the Hugging Face Hub with features 
+allowing for quick inspection of tensors and metadata within the file.
+This file format is designed as a "single-file-format" where a single file usually contains both the configuration
+attributes, the tokenizer vocabulary and other attributes, as well as all tensors to be loaded in the model. These
+files come in different formats according to the quantization type of the file. We briefly go over some of them
+here.
+Support within Transformers
+We have added the ability to load gguf files within transformers in order to offer further training/fine-tuning
+capabilities to gguf models, before converting back those models to gguf to use within the ggml ecosystem. When
+loading a model, we first dequantize it to fp32, before loading the weights to be used in PyTorch.
+
+[!NOTE]
+The support is still very exploratory and we welcome contributions in order to solidify it across quantization types
+and model architectures.
+
+For now, here are the supported model architectures and quantization types:
+Supported quantization types
+The initial supported quantization types are decided according to the popular quantized files that have been shared
+on the Hub.
+
+F32
+Q2_K
+Q3_K
+Q4_0
+Q4_K
+Q5_K
+Q6_K
+Q8_0
+
+We take example from the excellent 99991/pygguf Python parser to dequantize the 
+weights.
+Supported model architectures
+For now the supported model architectures are the architectures that have been very popular on the Hub, namely:
+
+LLaMa
+Mistral
+Qwen2
+
+Example usage
+In order to load gguf files in transformers, you should specify the gguf_file argument to the from_pretrained
+methods of both tokenizers and models. Here is how one would load a tokenizer and a model, which can be loaded
+from the exact same file:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aba996adf82035920064a2af0d74c93f1be58ae6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_gguf.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
+
+Now you have access to the full, unquantized version of the model in the PyTorch ecosystem, where you can combine it
+with a plethora of other tools.
+In order to convert back to a gguf file, we recommend using the 
+convert-hf-to-gguf.py file from llama.cpp.
+Here's how you would complete the script above to save the model and export it back to gguf:
+
+tokenizer.save_pretrained('directory')
+model.save_pretrained('directory')
+!python ${path_to_llama_cpp}/convert-hf-to-gguf.py ${directory}
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ae5650c4928a89c28cee17e7d717427ee150fc8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Glossary
+This glossary defines general machine learning and 🤗 Transformers terms to help you better understand the
+documentation.
+A
+attention mask
+The attention mask is an optional argument used when batching sequences together.
+
+This argument indicates to the model which tokens should be attended to, and which should not.
+For example, consider these two sequences:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "This is a short sequence."
+sequence_b = "This is a rather long sequence. It is at least longer than the sequence A."
+encoded_sequence_a = tokenizer(sequence_a)["input_ids"]
+encoded_sequence_b = tokenizer(sequence_b)["input_ids"]
+
+The encoded versions have different lengths:
+thon
+
+len(encoded_sequence_a), len(encoded_sequence_b)
+(8, 19)
+
+Therefore, we can't put them together in the same tensor as-is. The first sequence needs to be padded up to the length
+of the second one, or the second one needs to be truncated down to the length of the first one.
+In the first case, the list of IDs will be extended by the padding indices. We can pass a list to the tokenizer and ask
+it to pad like this:
+thon
+
+padded_sequences = tokenizer([sequence_a, sequence_b], padding=True)
+
+We can see that 0s have been added on the right of the first sentence to make it the same length as the second one:
+thon
+
+padded_sequences["input_ids"]
+[[101, 1188, 1110, 170, 1603, 4954, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1110, 170, 1897, 1263, 4954, 119, 1135, 1110, 1120, 1655, 2039, 1190, 1103, 4954, 138, 119, 102]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9969f15e7edc7a4b07597afb54370c11ce22a76c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+This can then be converted into a tensor in PyTorch or TensorFlow. The attention mask is a binary tensor indicating the
+position of the padded indices so that the model does not attend to them. For the [BertTokenizer], 1 indicates a
+value that should be attended to, while 0 indicates a padded value. This attention mask is in the dictionary returned
+by the tokenizer under the key "attention_mask":
+thon
+
+padded_sequences["attention_mask"]
+[[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb2868677d4b0aafefd4cd28d61972dce13c0397
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_10.txt
@@ -0,0 +1,27 @@
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
+each token. Therefore, the position IDs (position_ids) are used by the model to identify each token's position in the
+list of tokens.
+They are an optional parameter. If no position_ids are passed to the model, the IDs are automatically created as
+absolute positional embeddings.
+Absolute positional embeddings are selected in the range [0, config.max_position_embeddings - 1]. Some models use
+other types of positional embeddings, such as sinusoidal position embeddings or relative position embeddings.
+preprocessing
+The task of preparing raw data into a format that can be easily consumed by machine learning models. For example, text is typically preprocessed by tokenization. To gain a better idea of what preprocessing looks like for other input types, check out the Preprocess tutorial.
+pretrained model
+A model that has been pretrained on some data (for instance all of Wikipedia). Pretraining methods involve a
+self-supervised objective, which can be reading the text and trying to predict the next word (see causal language
+modeling) or masking some words and trying to predict them (see masked language
+modeling). 
+Speech and vision models have their own pretraining objectives. For example, Wav2Vec2 is a speech model pretrained on a contrastive task which requires the model to identify the "true" speech representation from a set of "false" speech representations. On the other hand, BEiT is a vision model pretrained on a masked image modeling task which masks some of the image patches and requires the model to predict the masked patches (similar to the masked language modeling objective).
+R
+recurrent neural network (RNN)
+A type of model that uses a loop over a layer to process texts.
+representation learning
+A subfield of machine learning which focuses on learning meaningful representations of raw data. Some examples of representation learning techniques include word embeddings, autoencoders, and Generative Adversarial Networks (GANs).
+S
+sampling rate
+A measurement in hertz of the number of samples (the audio signal) taken per second. The sampling rate is a result of discretizing a continuous signal such as speech.
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e8732b59b15fa6545306db493a0c8c804d95b40
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_11.txt
@@ -0,0 +1,22 @@
+self-attention
+Each element of the input finds out which other elements of the input they should attend to.
+self-supervised learning
+A category of machine learning techniques in which a model creates its own learning objective from unlabeled data. It differs from unsupervised learning and supervised learning in that the learning process is supervised, but not explicitly from the user. 
+One example of self-supervised learning is masked language modeling, where a model is passed sentences with a proportion of its tokens removed and learns to predict the missing tokens.
+semi-supervised learning
+A broad category of machine learning training techniques that leverages a small amount of labeled data with a larger quantity of unlabeled data to improve the accuracy of a model, unlike supervised learning and unsupervised learning.
+An example of a semi-supervised learning approach is "self-training", in which a model is trained on labeled data, and then used to make predictions on the unlabeled data. The portion of the unlabeled data that the model predicts with the most confidence gets added to the labeled dataset and used to retrain the model.
+sequence-to-sequence (seq2seq)
+Models that generate a new sequence from an input, like translation models, or summarization models (such as
+Bart or T5).
+Sharded DDP
+Another name for the foundational ZeRO concept as used by various other implementations of ZeRO.
+stride
+In convolution or pooling, the stride refers to the distance the kernel is moved over a matrix. A stride of 1 means the kernel is moved one pixel over at a time, and a stride of 2 means the kernel is moved two pixels over at a time.
+supervised learning
+A form of model training that directly uses labeled data to correct and instruct model performance. Data is fed into the model being trained, and its predictions are compared to the known labels. The model updates its weights based on how incorrect its predictions were, and the process is repeated to optimize model performance.
+T
+Tensor Parallelism (TP)
+Parallelism technique for training on multiple GPUs in which each tensor is split up into multiple chunks, so instead of 
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2135d2152b341041ec70511540f1e5c6b5eb1c8e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_12.txt
@@ -0,0 +1,9 @@
+having the whole tensor reside on a single GPU, each shard of the tensor resides on its designated GPU. Shards gets 
+processed separately and in parallel on different GPUs and the results are synced at the end of the processing step. 
+This is what is sometimes called horizontal parallelism, as the splitting happens on horizontal level.
+Learn more about Tensor Parallelism here.
+token
+A part of a sentence, usually a word, but can also be a subword (non-common words are often split in subwords) or a
+punctuation symbol.
+token Type IDs
+Some models' purpose is to do classification on pairs of sentences or question answering.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..949008271864e1d0bfc9c50137268c7bcf55eb6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_13.txt
@@ -0,0 +1,32 @@
+These require two different sequences to be joined in a single "input_ids" entry, which usually is performed with the
+help of special tokens, such as the classifier ([CLS]) and separator ([SEP]) tokens. For example, the BERT model
+builds its two sequence input as such:
+thon
+
+[CLS] SEQUENCE_A [SEP] SEQUENCE_B [SEP]
+
+We can use our tokenizer to automatically generate such a sentence by passing the two sequences to tokenizer as two
+arguments (and not a list, like before) like this:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence_a = "HuggingFace is based in NYC"
+sequence_b = "Where is HuggingFace based?"
+encoded_dict = tokenizer(sequence_a, sequence_b)
+decoded = tokenizer.decode(encoded_dict["input_ids"])
+
+which will return:
+thon
+
+print(decoded)
+[CLS] HuggingFace is based in NYC [SEP] Where is HuggingFace based? [SEP]
+
+This is enough for some models to understand where one sequence ends and where another begins. However, other models,
+such as BERT, also deploy token type IDs (also called segment IDs). They are represented as a binary mask identifying
+the two types of sequence in the model.
+The tokenizer returns this mask as the "token_type_ids" entry:
+thon
+
+encoded_dict["token_type_ids"]
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b495c3a64a11dc1e52a3e9419fbde0ccf45ec88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_14.txt
@@ -0,0 +1,16 @@
+The first sequence, the "context" used for the question, has all its tokens represented by a 0, whereas the second
+sequence, corresponding to the "question", has all its tokens represented by a 1.
+Some models, like [XLNetModel] use an additional token represented by a 2.
+transfer learning
+A technique that involves taking a pretrained model and adapting it to a dataset specific to your task. Instead of training a model from scratch, you can leverage knowledge obtained from an existing model as a starting point. This speeds up the learning process and reduces the amount of training data needed.
+transformer
+Self-attention based deep learning model architecture.
+U
+unsupervised learning
+A form of model training in which data provided to the model is not labeled. Unsupervised learning techniques leverage statistical information of the data distribution to find patterns useful for the task at hand.
+Z
+Zero Redundancy Optimizer (ZeRO)
+Parallelism technique which performs sharding of the tensors somewhat similar to TensorParallel, 
+except the whole tensor gets reconstructed in time for a forward or backward computation, therefore the model doesn't need 
+to be modified. This method also supports various offloading techniques to compensate for limited GPU memory. 
+Learn more about ZeRO here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ee388a65c468357ce56a72bc0abe06e97fad09a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+autoencoding models
+See encoder models and masked language modeling
+autoregressive models
+See causal language modeling and decoder models
+B
+backbone
+The backbone is the network (embeddings and layers) that outputs the raw hidden states or features. It is usually connected to a head which accepts the features as its input to make a prediction. For example, [ViTModel] is a backbone without a specific head on top. Other models can also use [VitModel] as a backbone such as DPT.
+C
+causal language modeling
+A pretraining task where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence but using a mask inside the model to hide the future tokens at a certain timestep.
+channel
+Color images are made up of some combination of values in three channels: red, green, and blue (RGB) and grayscale images only have one channel. In 🤗 Transformers, the channel can be the first or last dimension of an image's tensor: [n_channels, height, width] or [height, width, n_channels].
+connectionist temporal classification (CTC)
+An algorithm which allows a model to learn without knowing exactly how the input and output are aligned; CTC calculates the distribution of all possible outputs for a given input and chooses the most likely output from it. CTC is commonly used in speech recognition tasks because speech doesn't always cleanly align with the transcript for a variety of reasons such as a speaker's different speech rates.
+convolution
+A type of layer in a neural network where the input matrix is multiplied element-wise by a smaller matrix (kernel or filter) and the values are summed up in a new matrix. This is known as a convolutional operation which is repeated over the entire input matrix. Each operation is applied to a different segment of the input matrix. Convolutional neural networks (CNNs) are commonly used in computer vision.
+D
+DataParallel (DP)
+Parallelism technique for training on multiple GPUs where the same setup is replicated multiple times, with each instance 
+receiving a distinct data slice. The processing is done in parallel and all setups are synchronized at the end of each training step.
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c358131ab78f87b7e82ec692517540f1423c4b3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Learn more about how DataParallel works here.
+decoder input IDs
+This input is specific to encoder-decoder models, and contains the input IDs that will be fed to the decoder. These
+inputs should be used for sequence to sequence tasks, such as translation or summarization, and are usually built in a
+way specific to each model.
+Most encoder-decoder models (BART, T5) create their decoder_input_ids on their own from the labels. In such models,
+passing the labels is the preferred way to handle training.
+Please check each model's docs to see how they handle these input IDs for sequence to sequence training.
+decoder models
+Also referred to as autoregressive models, decoder models involve a pretraining task (called causal language modeling) where the model reads the texts in order and has to predict the next word. It's usually done by
+reading the whole sentence with a mask to hide future tokens at a certain timestep.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26fe9d8cef7f868f53c10056a83562d8c22c0b42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_4.txt
@@ -0,0 +1,5 @@
+deep learning (DL)
+Machine learning algorithms which uses neural networks with several layers.
+E
+encoder models
+Also known as autoencoding models, encoder models take an input (such as text or images) and transform them into a condensed numerical representation called an embedding. Oftentimes, encoder models are pretrained using techniques like masked language modeling, which masks parts of the input sequence and forces the model to create more meaningful representations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee710f8b81aee319bc299e7b0a50f5fcdd2527ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+F
+feature extraction
+The process of selecting and transforming raw data into a set of features that are more informative and useful for machine learning algorithms. Some examples of feature extraction include transforming raw text into word embeddings and extracting important features such as edges or shapes from image/video data.
+feed forward chunking
+In each residual attention block in transformers the self-attention layer is usually followed by 2 feed forward layers.
+The intermediate embedding size of the feed forward layers is often bigger than the hidden size of the model (e.g., for
+google-bert/bert-base-uncased).
+For an input of size [batch_size, sequence_length], the memory required to store the intermediate feed forward
+embeddings [batch_size, sequence_length, config.intermediate_size] can account for a large fraction of the memory
+use. The authors of Reformer: The Efficient Transformer noticed that since the
+computation is independent of the sequence_length dimension, it is mathematically equivalent to compute the output
+embeddings of both feed forward layers [batch_size, config.hidden_size]_0, , [batch_size, config.hidden_size]_n
+individually and concat them afterward to [batch_size, sequence_length, config.hidden_size] with n = sequence_length, which trades increased computation time against reduced memory use, but yields a mathematically
+equivalent result.
+For models employing the function [apply_chunking_to_forward], the chunk_size defines the number of output
+embeddings that are computed in parallel and thus defines the trade-off between memory and time complexity. If
+chunk_size is set to 0, no feed forward chunking is done.
+finetuned models
+Finetuning is a form of transfer learning which involves taking a pretrained model, freezing its weights, and replacing the output layer with a newly added model head. The model head is trained on your target dataset.
+See the Fine-tune a pretrained model tutorial for more details, and learn how to fine-tune models with 🤗 Transformers.
+H
+head
+The model head refers to the last layer of a neural network that accepts the raw hidden states and projects them onto a different dimension. There is a different model head for each task. For example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da0cda005a7594f5533827ef899840bba035bca4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_6.txt
@@ -0,0 +1,38 @@
+[GPT2ForSequenceClassification] is a sequence classification head - a linear layer - on top of the base [GPT2Model].
+[ViTForImageClassification] is an image classification head - a linear layer on top of the final hidden state of the CLS token - on top of the base [ViTModel].
+[Wav2Vec2ForCTC] is a language modeling head with CTC on top of the base [Wav2Vec2Model].
+
+I
+image patch
+Vision-based Transformers models split an image into smaller patches which are linearly embedded, and then passed as a sequence to the model. You can find the patch_size - or resolution - of the model in its configuration.
+inference
+Inference is the process of evaluating a model on new data after training is complete. See the Pipeline for inference tutorial to learn how to perform inference with 🤗 Transformers.
+input IDs
+The input ids are often the only required parameters to be passed to the model as input. They are token indices,
+numerical representations of tokens building the sequences that will be used as input by the model.
+
+Each tokenizer works differently but the underlying mechanism remains the same. Here's an example using the BERT
+tokenizer, which is a WordPiece tokenizer:
+thon
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased")
+sequence = "A Titan RTX has 24GB of VRAM"
+
+The tokenizer takes care of splitting the sequence into tokens available in the tokenizer vocabulary.
+thon
+
+tokenized_sequence = tokenizer.tokenize(sequence)
+
+The tokens are either words or subwords. Here for instance, "VRAM" wasn't in the model vocabulary, so it's been split
+in "V", "RA" and "M". To indicate those tokens are not separate words but parts of the same word, a double-hash prefix
+is added for "RA" and "M":
+thon
+
+print(tokenized_sequence)
+['A', 'Titan', 'R', '##T', '##X', 'has', '24', '##GB', 'of', 'V', '##RA', '##M']
+
+These tokens can then be converted into IDs which are understandable by the model. This can be done by directly feeding the sentence to the tokenizer, which leverages the Rust implementation of 🤗 Tokenizers for peak performance.
+thon
+
+inputs = tokenizer(sequence)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0687b50af92257180429a0d1959bd18281738bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_7.txt
@@ -0,0 +1,30 @@
+inputs = tokenizer(sequence)
+
+The tokenizer returns a dictionary with all the arguments necessary for its corresponding model to work properly. The
+token indices are under the key input_ids:
+thon
+
+encoded_sequence = inputs["input_ids"]
+print(encoded_sequence)
+[101, 138, 18696, 155, 1942, 3190, 1144, 1572, 13745, 1104, 159, 9664, 2107, 102]
+
+Note that the tokenizer automatically adds "special tokens" (if the associated model relies on them) which are special
+IDs the model sometimes uses.
+If we decode the previous sequence of ids,
+thon
+
+decoded_sequence = tokenizer.decode(encoded_sequence)
+
+we will see
+thon
+
+print(decoded_sequence)
+[CLS] A Titan RTX has 24GB of VRAM [SEP]
+
+because this is the way a [BertModel] is going to expect its inputs.
+L
+labels
+The labels are an optional argument which can be passed in order for the model to compute the loss itself. These labels
+should be the expected prediction of the model: it will use the standard loss in order to compute the loss between its
+predictions and the expected value (the label).
+These labels are different according to the model head, for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4a01e4c22d9f0cad195e5ad8d77361a0bf6c795
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_8.txt
@@ -0,0 +1,23 @@
+For sequence classification models, ([BertForSequenceClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of the entire sequence.
+For token classification models, ([BertForTokenClassification]), the model expects a tensor of dimension
+  (batch_size, seq_length) with each value corresponding to the expected label of each individual token.
+For masked language modeling, ([BertForMaskedLM]), the model expects a tensor of dimension (batch_size,
+  seq_length) with each value corresponding to the expected label of each individual token: the labels being the token
+  ID for the masked token, and values to be ignored for the rest (usually -100).
+For sequence to sequence tasks, ([BartForConditionalGeneration], [MBartForConditionalGeneration]), the model
+  expects a tensor of dimension (batch_size, tgt_seq_length) with each value corresponding to the target sequences
+  associated with each input sequence. During training, both BART and T5 will make the appropriate
+  decoder_input_ids and decoder attention masks internally. They usually do not need to be supplied. This does not
+  apply to models leveraging the Encoder-Decoder framework.
+For image classification models, ([ViTForImageClassification]), the model expects a tensor of dimension
+  (batch_size) with each value of the batch corresponding to the expected label of each individual image.
+For semantic segmentation models, ([SegformerForSemanticSegmentation]), the model expects a tensor of dimension
+  (batch_size, height, width) with each value of the batch corresponding to the expected label of each individual pixel.
+For object detection models, ([DetrForObjectDetection]), the model expects a list of dictionaries with a
+  class_labels and boxes key where each value of the batch corresponds to the expected label and number of bounding boxes of each individual image.
+For automatic speech recognition models, ([Wav2Vec2ForCTC]), the model expects a tensor of dimension (batch_size,
+  target_length) with each value corresponding to the expected label of each individual token.
+
+Each model's labels may be different, so be sure to always check the documentation of each model for more information
+about their specific labels!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d59f7326373adf7dc76c5689b9a6b99d01584fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_glossary.txt_chunk_9.txt
@@ -0,0 +1,32 @@
+The base models ([BertModel]) do not accept labels, as these are the base transformer models, simply outputting
+features.
+large language models (LLM)
+A generic term that refers to transformer language models (GPT-3, BLOOM, OPT) that were trained on a large quantity of data. These models also tend to have a large number of learnable parameters (e.g. 175 billion for GPT-3).
+M
+masked language modeling (MLM)
+A pretraining task where the model sees a corrupted version of the texts, usually done by
+masking some tokens randomly, and has to predict the original text.
+multimodal
+A task that combines texts with another kind of inputs (for instance images).
+N
+Natural language generation (NLG)
+All tasks related to generating text (for instance, Write With Transformers, translation).
+Natural language processing (NLP)
+A generic way to say "deal with texts".
+Natural language understanding (NLU)
+All tasks related to understanding what is in a text (for instance classifying the
+whole text, individual words).
+P
+pipeline
+A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization.
+For more details, see Pipelines for inference.
+PipelineParallel (PP)
+Parallelism technique in which the model is split up vertically (layer-level) across multiple GPUs, so that only one or 
+several layers of the model are placed on a single GPU. Each GPU processes in parallel different stages of the pipeline 
+and working on a small chunk of the batch. Learn more about how PipelineParallel works here.
+pixel values
+A tensor of the numerical representations of an image that is passed to a model. The pixel values have a shape of [batch_size, num_channels, height, width], and are generated from an image processor.
+pooling
+An operation that reduces a matrix into a smaller matrix, either by taking the maximum or average of the pooled dimension(s). Pooling layers are commonly found between convolutional layers to downsample the feature representation.
+position IDs
+Contrary to RNNs that have the position of each token embedded within them, transformers are unaware of the position of
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0f4616c4737c6a7cf540720ab4d0cbc9500a1e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+Hyperparameter Search using Trainer API
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] provides API for hyperparameter search. This doc shows how to enable it in example. 
+Hyperparameter Search backend
+[Trainer] supports four hyperparameter search backends currently:
+optuna, sigopt, raytune and wandb.
+you should install them before using them as the hyperparameter search backend
+
+pip install optuna/sigopt/wandb/ray[tune]
+How to enable Hyperparameter search in example
+Define the hyperparameter search space, different backends need different format.
+For sigopt, see sigopt object_parameter, it's like following:
+
+def sigopt_hp_space(trial):
+     return [
+         {"bounds": {"min": 1e-6, "max": 1e-4}, "name": "learning_rate", "type": "double"},
+         {
+             "categorical_values": ["16", "32", "64", "128"],
+             "name": "per_device_train_batch_size",
+             "type": "categorical",
+         },
+     ]
+
+For optuna, see optuna object_parameter, it's like following:
+
+def optuna_hp_space(trial):
+     return {
+         "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
+         "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32, 64, 128]),
+     }
+
+Optuna provides multi-objective HPO. You can pass direction in hyperparameter_search and define your own compute_objective to return multiple objective values. The Pareto Front (List[BestRun]) will be returned in hyperparameter_search, you should refer to the test case TrainerHyperParameterMultiObjectOptunaIntegrationTest in test_trainer. It's like following
+
+best_trials = trainer.hyperparameter_search(
+     direction=["minimize", "maximize"],
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+For raytune, see raytune object_parameter, it's like following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13b0a3a6f9284485878b284409eb1ecaa80ebe36
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_1.txt
@@ -0,0 +1,55 @@
+For raytune, see raytune object_parameter, it's like following:
+
+def ray_hp_space(trial):
+     return {
+         "learning_rate": tune.loguniform(1e-6, 1e-4),
+         "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
+     }
+
+For wandb, see wandb object_parameter, it's like following:
+
+def wandb_hp_space(trial):
+     return {
+         "method": "random",
+         "metric": {"name": "objective", "goal": "minimize"},
+         "parameters": {
+             "learning_rate": {"distribution": "uniform", "min": 1e-6, "max": 1e-4},
+             "per_device_train_batch_size": {"values": [16, 32, 64, 128]},
+         },
+     }
+
+Define a model_init function and pass it to the [Trainer], as an example:
+
+def model_init(trial):
+     return AutoModelForSequenceClassification.from_pretrained(
+         model_args.model_name_or_path,
+         from_tf=bool(".ckpt" in model_args.model_name_or_path),
+         config=config,
+         cache_dir=model_args.cache_dir,
+         revision=model_args.model_revision,
+         token=True if model_args.use_auth_token else None,
+     )
+
+Create a [Trainer] with your model_init function, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=None,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+     tokenizer=tokenizer,
+     model_init=model_init,
+     data_collator=data_collator,
+ )
+
+Call hyperparameter search, get the best trial parameters, backend could be "optuna"/"sigopt"/"wandb"/"ray". direction can be"minimize" or "maximize", which indicates whether to optimize greater or lower objective.
+You could define your own compute_objective function, if not defined, the default compute_objective will be called, and the sum of eval metric like f1 is returned as objective value.
+
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a357dd98be2cd2d2e403e28fe6864c997a493032
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_hpo_train.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+best_trial = trainer.hyperparameter_search(
+     direction="maximize",
+     backend="optuna",
+     hp_space=optuna_hp_space,
+     n_trials=20,
+     compute_objective=compute_objective,
+ )
+
+Hyperparameter search For DDP finetune
+Currently, Hyperparameter search for DDP is enabled for optuna and sigopt. Only the rank-zero process will generate the search trial and pass the argument to other ranks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ce4bfb7abebaf3c31872c72d6af838ef952b86d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Installation
+Install 🤗 Transformers for whichever deep learning library you're working with, setup your cache, and optionally configure 🤗 Transformers to run offline.
+🤗 Transformers is tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax. Follow the installation instructions below for the deep learning library you are using:
+
+PyTorch installation instructions.
+TensorFlow 2.0 installation instructions.
+Flax installation instructions.
+
+Install with pip
+You should install 🤗 Transformers in a virtual environment. If you're unfamiliar with Python virtual environments, take a look at this guide. A virtual environment makes it easier to manage different projects, and avoid compatibility issues between dependencies.
+Start by creating a virtual environment in your project directory:
+
+python -m venv .env
+Activate the virtual environment. On Linux and MacOs:
+
+source .env/bin/activate
+Activate Virtual environment on Windows
+
+.env/Scripts/activate
+Now you're ready to install 🤗 Transformers with the following command:
+
+pip install transformers
+For CPU-support only, you can conveniently install 🤗 Transformers and a deep learning library in one line. For example, install 🤗 Transformers and PyTorch with:
+
+pip install 'transformers[torch]'
+🤗 Transformers and TensorFlow 2.0:
+
+pip install 'transformers[tf-cpu]'
+
+M1 / ARM Users
+You will need to install the following before installing TensorFLow 2.0
+
+brew install cmake
+brew install pkg-config
+
+🤗 Transformers and Flax:
+
+pip install 'transformers[flax]'
+Finally, check if 🤗 Transformers has been properly installed by running the following command. It will download a pretrained model:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('we love you'))"
+Then print out the label and score:
+
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4553f1674e5224abcd0cfe3ffe4dbd37d6a9c43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+[{'label': 'POSITIVE', 'score': 0.9998704791069031}]
+Install from source
+Install 🤗 Transformers from source with the following command:
+
+pip install git+https://github.com/huggingface/transformers
+This command installs the bleeding edge main version rather than the latest stable version. The main version is useful for staying up-to-date with the latest developments. For instance, if a bug has been fixed since the last official release but a new release hasn't been rolled out yet. However, this means the main version may not always be stable. We strive to keep the main version operational, and most issues are usually resolved within a few hours or a day. If you run into a problem, please open an Issue so we can fix it even sooner!
+Check if 🤗 Transformers has been properly installed by running the following command:
+
+python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"
+Editable install
+You will need an editable install if you'd like to:
+
+Use the main version of the source code.
+Contribute to 🤗 Transformers and need to test changes in the code.
+
+Clone the repository and install 🤗 Transformers with the following commands:
+
+git clone https://github.com/huggingface/transformers.git
+cd transformers
+pip install -e .
+These commands will link the folder you cloned the repository to and your Python library paths. Python will now look inside the folder you cloned to in addition to the normal library paths. For example, if your Python packages are typically installed in ~/anaconda3/envs/main/lib/python3.7/site-packages/, Python will also search the folder you cloned to: ~/transformers/.
+
+You must keep the transformers folder if you want to keep using the library.
+
+Now you can easily update your clone to the latest version of 🤗 Transformers with the following command:
+
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce5214ba88048609cf04abd518b0d14d2d2d508b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+cd ~/transformers/
+git pull
+Your Python environment will find the main version of 🤗 Transformers on the next run.
+Install with conda
+Install from the conda channel conda-forge:
+
+conda install conda-forge::transformers
+Cache setup
+Pretrained models are downloaded and locally cached at: ~/.cache/huggingface/hub. This is the default directory given by the shell environment variable TRANSFORMERS_CACHE. On Windows, the default directory is given by C:\Users\username\.cache\huggingface\hub. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory:
+
+Shell environment variable (default): HUGGINGFACE_HUB_CACHE or TRANSFORMERS_CACHE.
+Shell environment variable: HF_HOME.
+Shell environment variable: XDG_CACHE_HOME + /huggingface.
+
+🤗 Transformers will use the shell environment variables PYTORCH_TRANSFORMERS_CACHE or PYTORCH_PRETRAINED_BERT_CACHE if you are coming from an earlier iteration of this library and have set those environment variables, unless you specify the shell environment variable TRANSFORMERS_CACHE.
+
+Offline mode
+Run 🤗 Transformers in a firewalled or offline environment with locally cached files by setting the environment variable HF_HUB_OFFLINE=1.
+
+Add 🤗 Datasets to your offline training workflow with the environment variable HF_DATASETS_OFFLINE=1.
+
+HF_DATASETS_OFFLINE=1 HF_HUB_OFFLINE=1 \
+python examples/pytorch/translation/run_translation.py --model_name_or_path google-t5/t5-small --dataset_name wmt16 --dataset_config ro-en 
+This script should run without hanging or waiting to timeout because it won't attempt to download the model from the Hub.
+You can also bypass loading a model from the Hub from each [~PreTrainedModel.from_pretrained] call with the [local_files_only] parameter. When set to True, only local files are loaded:
+
+from transformers import T5Model
+model = T5Model.from_pretrained("./path/to/local/directory", local_files_only=True)
+
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da7a5e27f955bef3337d11ee887a6dc9e0dc08bf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_installation.txt_chunk_3.txt
@@ -0,0 +1,40 @@
+Fetch models and tokenizers to use offline
+Another option for using 🤗 Transformers offline is to download the files ahead of time, and then point to their local path when you need to use them offline. There are three ways to do this:
+
+Download a file through the user interface on the Model Hub by clicking on the ↓ icon.
+
+Use the [PreTrainedModel.from_pretrained] and [PreTrainedModel.save_pretrained] workflow:
+
+Download your files ahead of time with [PreTrainedModel.from_pretrained]:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B")
+model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B")
+
+Save your files to a specified directory with [PreTrainedModel.save_pretrained]:
+
+tokenizer.save_pretrained("./your/path/bigscience_t0")
+model.save_pretrained("./your/path/bigscience_t0")
+
+Now when you're offline, reload your files with [PreTrainedModel.from_pretrained] from the specified directory:
+
+tokenizer = AutoTokenizer.from_pretrained("./your/path/bigscience_t0")
+model = AutoModel.from_pretrained("./your/path/bigscience_t0")
+
+Programmatically download files with the huggingface_hub library:
+
+Install the huggingface_hub library in your virtual environment:
+
+python -m pip install huggingface_hub
+
+Use the hf_hub_download function to download a file to a specific path. For example, the following command downloads the config.json file from the T0 model to your desired path:
+
+from huggingface_hub import hf_hub_download
+hf_hub_download(repo_id="bigscience/T0_3B", filename="config.json", cache_dir="./your/path/bigscience_t0")
+
+Once your file is downloaded and locally cached, specify it's local path to load and use it:
+
+from transformers import AutoConfig
+config = AutoConfig.from_pretrained("./your/path/bigscience_t0/config.json")
+
+See the How to download files from the Hub section for more details on downloading files stored on the Hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac842fd2a5c62b54688dd7e1a782f16b491ad83f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+LLM inference optimization
+Large language models (LLMs) have pushed text generation applications, such as chat and code completion models, to the next level by producing text that displays a high level of understanding and fluency. But what makes LLMs so powerful - namely their size - also presents challenges for inference.
+Basic inference is slow because LLMs have to be called repeatedly to generate the next token. The input sequence increases as generation progresses, which takes longer and longer for the LLM to process. LLMs also have billions of parameters, making it a challenge to store and handle all those weights in memory.
+This guide will show you how to use the optimization techniques available in Transformers to accelerate LLM inference.
+
+[!TIP]
+Hugging Face also provides Text Generation Inference (TGI), a library dedicated to deploying and serving highly optimized LLMs for inference. It includes more optimization features not included in Transformers, such as continuous batching for increasing throughput and tensor parallelism for multi-GPU inference.
+
+Static kv-cache and torch.compile
+During decoding, a LLM computes the key-value (kv) values for each input token and since it is autoregressive, it computes the same kv values each time because the generated output becomes part of the input now. This is not very efficient because you're recomputing the same kv values each time.
+To optimize this, you can use a kv-cache to store the past keys and values instead of recomputing them each time. However, since the kv-cache grows with each generation step and is dynamic, it prevents you from taking advantage of torch.compile, a powerful optimization tool that fuses PyTorch code into fast and optimized kernels.
+The static kv-cache solves this issue by pre-allocating the kv-cache size to a maximum value which allows you to combine it with torch.compile for up to a 4x speed up.
+
+[!WARNING]
+Currently, only Llama and a few other models support static kv-cache and torch.compile. Check this issue for a live model compatibility list.
+
+For this example, let's load the Gemma model.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b", device_map="auto"
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1c3e384be3648ae9fd98c960607675341a02bf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+There are two ways you can configure the model to use a static kv-cache. For a 7B model on an A100, both methods get a 4x speed up in the forward pass. Your speed up may vary depending on the model size (larger models have a smaller speed up) and hardware. If you're using the [~GenerationMixin.generate] method, the speed up is ~3x. The forward pass (which still gets 4x speed up) is only a part of the whole [~GenerationMixin.generate] code.
+
+Access the model's generation_config attribute and set the cache_implementation to "static".
+py
+model.generation_config.cache_implementation = "static"
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
+input_text = "The theory of special relativity states "
+input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = compiled_model.generate(**input_ids)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['The theory of special relativity states 1. The speed of light is constant in all inertial reference']
+
+Under the hood, generate will attempt to reuse the same cache object, removing the need for re-compilation at each call. However, if the batch size or the maximum output length increase between calls, the cache will have to be reinitialized, triggering a new compilation.
+
+A [StaticCache] object can be passed to the model's forward pass under the past_key_values argument, enabling the use of this object as a static kv-cache. Using this strategy, you can write your own function to decode the next token given the current token and position and cache position of previously generated tokens. You can also pass the [StaticCache] object to [~GenerationMixin.generate] and use it across calls, like you would do with a dynamic cache.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..309b0ab77884a788e85c8eb0b33274b29af56727
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+from transformers import LlamaTokenizer, LlamaForCausalLM, StaticCache, logging
+from transformers.testing_utils import CaptureLogger
+import torch
+prompts = [
+    "Simply put, the theory of relativity states that ",
+    "My favorite all time favorite condiment is ketchup.",
+]
+NUM_TOKENS_TO_GENERATE = 40
+torch_device = "cuda"
+tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="", padding_side="right")
+model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="sequential")
+inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
+def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_values):
+    logits = model(
+        cur_token,
+        position_ids=input_pos,
+        cache_position=cache_position,
+        past_key_values=past_key_values,
+        return_dict=False,
+        use_cache=True
+    )[0]
+    new_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+    return new_token
+
+There are a few important things you must do to enable static kv-cache and torch.compile with the StaticCache method:
+
+Initialize the [StaticCache] instance before using the model for inference. There you can configure parameters like the maximum batch size and sequence length.
+
+Call torch.compile on the model to compile the forward pass with the static kv-cache.
+
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac664d63cd3b8b946fa2eb5ec64ad9eea5b7b5e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_3.txt
@@ -0,0 +1,35 @@
+Set enable_math=True in the torch.backends.cuda.sdp_kernel context manager to enable the native PyTorch C++ implementation of scaled dot product attention to speed up inference even more.
+
+batch_size, seq_length = inputs["input_ids"].shape
+with torch.no_grad():
+    past_key_values = StaticCache(
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+    )
+    cache_position = torch.arange(seq_length, device=torch_device)
+    generated_ids = torch.zeros(
+        batch_size, seq_length + NUM_TOKENS_TO_GENERATE + 1, dtype=torch.int, device=torch_device
+    )
+    generated_ids[:, cache_position] = inputs["input_ids"].to(torch_device).to(torch.int)
+logits = model(
+    **inputs, cache_position=cache_position, past_key_values=past_key_values,return_dict=False, use_cache=True
+)[0]
+next_token = torch.argmax(logits[:, -1], dim=-1)[:, None]
+generated_ids[:, seq_length] = next_token[:, 0]
+
+decode_one_tokens = torch.compile(decode_one_tokens, mode="reduce-overhead", fullgraph=True)
+cache_position = torch.tensor([seq_length + 1], device=torch_device)
+for _ in range(1, NUM_TOKENS_TO_GENERATE):
+    with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
+        next_token = decode_one_tokens(model, next_token.clone(), None, cache_position, past_key_values)
+        generated_ids[:, cache_position] = next_token.int()
+    cache_position += 1
+
+text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+text
+['Simply put, the theory of relativity states that 1) the speed of light is constant, 2) the speed of light is the same for all observers, and 3) the laws of physics are the same for all observers.',
+ 'My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my chicken, my burgers, my hot dogs, my sandwiches, my salads, my p']
+
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a4dbe5893b683dba1a21c1a0e851eedef17ce11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+[!TIP]
+If you want to reuse the [StaticCache] object on a new prompt, be sure to reset its contents with the .reset() method
+
+Speculative decoding
+
+[!TIP]
+For a more in-depth explanation, take a look at the Assisted Generation: a new direction toward low-latency text generation blog post!
+
+Another issue with autoregression is that for each input token you need to load the model weights each time during the forward pass. This is slow and cumbersome for LLMs which have billions of parameters. Speculative decoding alleviates this slowdown by using a second smaller and faster assistant model to generate candidate tokens that are verified by the larger LLM in a single forward pass. If the verified tokens are correct, the LLM essentially gets them for "free" without having to generate them itself. There is no degradation in accuracy because the verification forward pass ensures the same outputs are generated as if the LLM had generated them on its own.
+To get the largest speed up, the assistant model should be a lot smaller than the LLM so that it can generate tokens quickly. The assistant and LLM model must also share the same tokenizer to avoid re-encoding and decoding tokens.
+
+[!WARNING]
+Speculative decoding is only supported for the greedy search and sampling decoding strategies, and it also doesn't support batched inputs.
+
+Enable speculative decoding by loading an assistant model and passing it to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model)
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Einstein's theory of relativity states that the speed of light is constant.    "]
+
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d845685e4e86749a92bbdb8cfc20a02ed6562a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+For speculative sampling decoding, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method in addition to the assistant model.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("Einstein's theory of relativity states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, assistant_model=assistant_model, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["Einstein's theory of relativity states that motion in the universe is not a straight line.\n"]
+
+Prompt lookup decoding
+Prompt lookup decoding is a variant of speculative decoding that is also compatible with greedy search and sampling. Prompt lookup works especially well for input-grounded tasks - such as summarization - where there is often overlapping words between the prompt and output. These overlapping n-grams are used as the LLM candidate tokens.
+To enable prompt lookup decoding, specify the number of tokens that should be overlapping in the prompt_lookup_num_tokens parameter. Then you can pass this parameter to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+assistant_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['The second law of thermodynamics states that entropy increases with temperature.      ']
+
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be540b224d66dcac3357752ee931a54222c41e3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+For prompt lookup decoding with sampling, add the do_sample and temperature parameters to the [~GenerationMixin.generate] method.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
+inputs = tokenizer("The second law of thermodynamics states", return_tensors="pt").to(device)
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").to(device)
+outputs = model.generate(**inputs, prompt_lookup_num_tokens=3, do_sample=True, temperature=0.7)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+["The second law of thermodynamics states that energy cannot be created nor destroyed. It's not a"]
+
+Attention optimizations
+A known issue with transformer models is that the self-attention mechanism grows quadratically in compute and memory with the number of input tokens. This limitation is only magnified in LLMs which handles much longer sequences. To address this, try FlashAttention2 or PyTorch's scaled dot product attention (SDPA), which are more memory efficient attention implementations and can accelerate inference.
+FlashAttention-2
+FlashAttention and FlashAttention-2 break up the attention computation into smaller chunks and reduces the number of intermediate read/write operations to GPU memory to speed up inference. FlashAttention-2 improves on the original FlashAttention algorithm by also parallelizing over sequence length dimension and better partitioning work on the hardware to reduce synchronization and communication overhead.
+To use FlashAttention-2, set attn_implementation="flash_attention_2" in the [~PreTrainedModel.from_pretrained] method.
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..957eee1c4b6355b06576cbac74d360e83948362a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_7.txt
@@ -0,0 +1,26 @@
+PyTorch scaled dot product attention
+Scaled dot product attention (SDPA) is automatically enabled in PyTorch 2.0 and it supports FlashAttention, xFormers, and PyTorch's C++ implementation. SDPA chooses the most performant attention algorithm if you're using a CUDA backend. For other backends, SDPA defaults to the PyTorch C++ implementation.
+
+[!TIP]
+SDPA supports FlashAttention-2 as long as you have the latest PyTorch version installed.
+
+Use the torch.backends.cuda.sdp_kernel context manager to explicitly enable or disable any of the three attention algorithms. For example, set enable_flash=True to enable FlashAttention.
+
+import torch
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b",
+    torch_dtype=torch.bfloat16,
+)
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+Quantization
+Quantization reduces the size of the LLM weights by storing them in a lower precision. This translates to lower memory usage and makes loading LLMs for inference more accessible if you're constrained by your GPUs memory. If you aren't limited by your GPU, you don't necessarily need to quantize your model because it can incur a small latency cost (except for AWQ and fused AWQ modules) due to the extra step required to quantize and dequantize the weights.
+
+[!TIP]
+There are many quantization libraries (see the Quantization guide for more details) available, such as Quanto, AQLM, AWQ, and AutoGPTQ. Feel free to try them out and see which one works best for your use case. We also recommend reading the Overview of natively supported quantization schemes in 🤗 Transformers blog post which compares AutoGPTQ and bitsandbytes.
+
+Use the Model Memory Calculator below to estimate and compare how much memory is required to load a model. For example, try estimating how much memory it costs to load Mistral-7B-v0.1.
+
+To load Mistral-7B-v0.1 in half-precision, set the torch_dtype parameter in the [~transformers.AutoModelForCausalLM.from_pretrained] method to torch.bfloat16. This requires 13.74GB of memory.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcefeb1329f7e4b1603974b9a39df8ff16f86940
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_optims.txt_chunk_8.txt
@@ -0,0 +1,15 @@
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.bfloat16, device_map="auto",
+)
+
+To load a quantized model (8-bit or 4-bit) for inference, try bitsandbytes and set the load_in_4bit or load_in_8bit parameters to True. Loading the model in 8-bits only requires 6.87 GB of memory.
+
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+import torch
+quant_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "mistralai/Mistral-7B-v0.1", quantization_config=quant_config, device_map="auto"
+)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18c9388e1e7454883e0a91ca7851f836a1bf48ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"
+
+The process depicted above is repeated iteratively until some stopping condition is reached. Ideally, the stopping condition is dictated by the model, which should learn when to output an end-of-sequence (EOS) token. If this is not the case, generation stops when some predefined maximum length is reached.
+Properly setting up the token selection step and the stopping condition is essential to make your model behave as you'd expect on your task. That is why we have a [~generation.GenerationConfig] file associated with each model, which contains a good default generative parameterization and is loaded alongside your model.
+Let's talk code!
+
+If you're interested in basic LLM usage, our high-level Pipeline interface is a great starting point. However, LLMs often require advanced features like quantization and fine control of the token selection step, which is best done through [~generation.GenerationMixin.generate]. Autoregressive generation with LLMs is also resource-intensive and should be executed on a GPU for adequate throughput.
+
+First, you need to load the model.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
+
+You'll notice two flags in the from_pretrained call:
+
+device_map ensures the model is moved to your GPU(s)
+load_in_4bit applies 4-bit dynamic quantization to massively reduce the resource requirements
+
+There are other ways to initialize a model, but this is a good baseline to begin with an LLM.
+Next, you need to preprocess your text input with a tokenizer.
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+model_inputs = tokenizer(["A list of colors: red, blue"], return_tensors="pt").to("cuda")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f41c12546646d006504f077b394c15dc481a0999
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+The model_inputs variable holds the tokenized text input, as well as the attention mask. While [~generation.GenerationMixin.generate] does its best effort to infer the attention mask when it is not passed, we recommend passing it whenever possible for optimal results.
+After tokenizing the inputs, you can call the [~generation.GenerationMixin.generate] method to returns the generated tokens. The generated tokens then should be converted to text before printing.
+
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A list of colors: red, blue, green, yellow, orange, purple, pink,'
+
+Finally, you don't need to do it one sequence at a time! You can batch your inputs, which will greatly improve the throughput at a small latency and memory cost. All you need to do is to make sure you pad your inputs properly (more on that below).
+
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["A list of colors: red, blue", "Portugal is"], return_tensors="pt", padding=True
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['A list of colors: red, blue, green, yellow, orange, purple, pink,',
+'Portugal is a country in southwestern Europe, on the Iber']
+
+And that's it! In a few lines of code, you can harness the power of an LLM.
+Common pitfalls
+There are many generation strategies, and sometimes the default values may not be appropriate for your use case. If your outputs aren't aligned with what you're expecting, we've created a list of the most common pitfalls and how to avoid them.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model = AutoModelForCausalLM.from_pretrained(
+     "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5ce80377b8c5eba2e03eb209e54511ff2064bc2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Generated output is too short/long
+If not specified in the [~generation.GenerationConfig] file, generate returns up to 20 tokens by default. We highly recommend manually setting max_new_tokens in your generate call to control the maximum number of new tokens it can return. Keep in mind LLMs (more precisely, decoder-only models) also return the input prompt as part of the output.
+
+model_inputs = tokenizer(["A sequence of numbers: 1, 2"], return_tensors="pt").to("cuda")
+By default, the output will contain up to 20 tokens
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5'
+Setting max_new_tokens allows you to control the maximum length
+generated_ids = model.generate(**model_inputs, max_new_tokens=50)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'
+
+Incorrect generation mode
+By default, and unless specified in the [~generation.GenerationConfig] file, generate selects the most likely token at each iteration (greedy decoding). Depending on your task, this may be undesirable; creative tasks like chatbots or writing an essay benefit from sampling. On the other hand, input-grounded tasks like audio transcription or translation benefit from greedy decoding. Enable sampling with do_sample=True, and you can learn more about this topic in this blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47f817b4ba488ace6c19072b964f79a37a4d21a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+Set seed or reproducibility -- you don't need this unless you want full reproducibility
+from transformers import set_seed
+set_seed(42)
+model_inputs = tokenizer(["I am a cat."], return_tensors="pt").to("cuda")
+LLM + greedy decoding = repetitive, boring output
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat. I am a cat. I am a cat. I am a cat'
+With sampling, the output becomes more creative!
+generated_ids = model.generate(**model_inputs, do_sample=True)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'I am a cat.  Specifically, I am an indoor-only cat.  I'
+
+Wrong padding side
+LLMs are decoder-only architectures, meaning they continue to iterate on your input prompt. If your inputs do not have the same length, they need to be padded. Since LLMs are not trained to continue from pad tokens, your input needs to be left-padded. Make sure you also don't forget to pass the attention mask to generate!
+
+The tokenizer initialized above has right-padding active by default: the 1st sequence,
+which is shorter, has padding on the right side. Generation fails to capture the logic.
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 33333333333'
+With left-padding, it works as expected!
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", padding_side="left")
+tokenizer.pad_token = tokenizer.eos_token  # Most LLMs don't have a pad token by default
+model_inputs = tokenizer(
+     ["1, 2, 3", "A, B, C, D, E"], padding=True, return_tensors="pt"
+ ).to("cuda")
+generated_ids = model.generate(**model_inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+'1, 2, 3, 4, 5, 6,'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64031646e749dc1bd8a004cfe7123e10dd53b11d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_4.txt
@@ -0,0 +1,31 @@
+Wrong prompt
+Some models and tasks expect a certain input prompt format to work properly. When this format is not applied, you will get a silent performance degradation: the model kinda works, but not as well as if you were following the expected prompt. More information about prompting, including which models and tasks need to be careful, is available in this guide. Let's see an example with a chat LLM, which makes use of chat templating:
+thon
+
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
+model = AutoModelForCausalLM.from_pretrained(
+     "HuggingFaceH4/zephyr-7b-alpha", device_map="auto", load_in_4bit=True
+ )
+set_seed(0)
+prompt = """How many helicopters can a human eat in one sitting? Reply as a thug."""
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+input_length = model_inputs.input_ids.shape[1]
+generated_ids = model.generate(**model_inputs, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+"I'm not a thug, but i can tell you that a human cannot eat"
+Oh no, it did not follow our instruction to reply as a thug! Let's see what happens when we write
+a better prompt and use the right template for this model (through tokenizer.apply_chat_template)
+set_seed(0)
+messages = [
+     {
+         "role": "system",
+         "content": "You are a friendly chatbot who always responds in the style of a thug",
+     },
+     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+input_length = model_inputs.shape[1]
+generated_ids = model.generate(model_inputs, do_sample=True, max_new_tokens=20)
+print(tokenizer.batch_decode(generated_ids[:, input_length:], skip_special_tokens=True)[0])
+'None, you thug. How bout you try to focus on more useful questions?'
+As we can see, it followed a proper thug style 😎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef25c1613893ed18ee47590c8ba09fe45daa8c80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial.txt_chunk_5.txt
@@ -0,0 +1,26 @@
+Further resources
+While the autoregressive generation process is relatively straightforward, making the most out of your LLM can be a challenging endeavor because there are many moving parts. For your next steps to help you dive deeper into LLM usage and understanding:
+Advanced generate usage
+
+Guide on how to control different generation methods, how to set up the generation configuration file, and how to stream the output;
+Accelerating text generation;
+Prompt templates for chat LLMs;
+Prompt design guide;
+API reference on [~generation.GenerationConfig], [~generation.GenerationMixin.generate], and generate-related classes. Most of the classes, including the logits processors, have usage examples!
+
+LLM leaderboards
+
+Open LLM Leaderboard, which focuses on the quality of the open-source models;
+Open LLM-Perf Leaderboard, which focuses on LLM throughput.
+
+Latency, throughput and memory utilization
+
+Guide on how to optimize LLMs for speed and memory;
+Guide on quantization such as bitsandbytes and autogptq, which shows you how to drastically reduce your memory requirements.
+
+Related libraries
+
+optimum, an extension of 🤗 Transformers that optimizes for specific hardware devices.
+outlines, a library where you can constrain text generation (e.g. to generate JSON files);
+text-generation-inference, a production-ready server for LLMs;
+text-generation-webui, a UI for text generation;
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04126fb85eb34c79e7359097f06a5097871bc706
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Optimizing LLMs for Speed and Memory
+[[open-in-colab]]
+Large Language Models (LLMs) such as GPT3/4, Falcon, and Llama are rapidly advancing in their ability to tackle human-centric tasks, establishing themselves as essential tools in modern knowledge-based industries.
+Deploying these models in real-world tasks remains challenging, however:
+
+To exhibit near-human text understanding and generation capabilities, LLMs currently require to be composed of billions of parameters (see Kaplan et al, Wei et. al). This consequently amplifies the memory demands for inference.
+In many real-world tasks, LLMs need to be given extensive contextual information. This necessitates the model's capability to manage very long input sequences during inference.
+
+The crux of these challenges lies in augmenting the computational and memory capabilities of LLMs, especially when handling expansive input sequences.
+In this guide, we will go over the effective techniques for efficient LLM deployment:
+
+Lower Precision: Research has shown that operating at reduced numerical precision, namely 8-bit and 4-bit can achieve computational advantages without a considerable decline in model performance.
+
+Flash Attention: Flash Attention is a variation of the attention algorithm that not only provides a more memory-efficient approach but also realizes increased efficiency due to optimized GPU memory utilization.
+
+Architectural Innovations: Considering that LLMs are always deployed in the same way during inference, namely autoregressive text generation with a long input context, specialized model architectures have been proposed that allow for more efficient inference. The most important advancement in model architectures hereby are Alibi, Rotary embeddings, Multi-Query Attention (MQA) and Grouped-Query-Attention (GQA).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eaead56c1ffeb6921710e0556a59ef336ee82e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Throughout this guide, we will offer an analysis of auto-regressive generation from a tensor's perspective. We delve into the pros and cons of adopting lower precision, provide a comprehensive exploration of the latest attention algorithms, and discuss improved LLM architectures. While doing so, we run practical examples showcasing each of the feature improvements.
+1. Lower Precision
+Memory requirements of LLMs can be best understood by seeing the LLM as a set of weight matrices and vectors and the text inputs as a sequence of vectors. In the following, the definition weights will be used to signify all model weight matrices and vectors.
+At the time of writing this guide, LLMs consist of at least a couple billion parameters. Each parameter thereby is made of a decimal number, e.g. 4.5689 which is usually stored in either float32, bfloat16, or float16 format. This allows us to easily compute the memory requirement to load the LLM into memory:
+
+Loading the weights of a model having X billion parameters requires roughly 4 * X GB of VRAM in float32 precision
+
+Nowadays, models are however rarely trained in full float32 precision, but usually in bfloat16 precision or less frequently in float16 precision. Therefore the rule of thumb becomes:
+
+Loading the weights of a model having X billion parameters requires roughly 2 * X GB of VRAM in bfloat16/float16 precision
+
+For shorter text inputs (less than 1024 tokens), the memory requirement for inference is very much dominated by the memory requirement to load the weights. Therefore, for now, let's assume that the memory requirement for inference is equal to the memory requirement to load the model into the GPU VRAM.
+To give some examples of how much VRAM it roughly takes to load a model in bfloat16:
+
+GPT3 requires 2 * 175 GB = 350 GB VRAM
+Bloom requires 2 * 176 GB = 352 GB VRAM
+Llama-2-70b requires 2 * 70 GB = 140 GB VRAM
+Falcon-40b requires 2 * 40 GB = 80 GB VRAM
+MPT-30b requires 2 * 30 GB = 60 GB VRAM
+bigcode/starcoder requires 2 * 15.5 = 31 GB VRAM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8748b65504e2307b909adebf1dc7cffe3968fcf7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_10.txt
@@ -0,0 +1,5 @@
+By keeping track of softmax normalization statistics and by using some smart mathematics, Flash Attention gives numerical identical outputs compared to the default self-attention layer at a memory cost that only increases linearly with \( N \) .
+
+Looking at the formula, one would intuitively say that Flash Attention must be much slower compared to the default self-attention formula as more computation needs to be done. Indeed Flash Attention requires more FLOPs compared to normal attention as the softmax normalization statistics have to constantly be recomputed (see paper for more details if interested)
+
+However, Flash Attention is much faster in inference compared to default attention which comes from its ability to significantly reduce the demands on the slower, high-bandwidth memory of the GPU (VRAM), focusing instead on the faster on-chip memory (SRAM).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..969c50099a3ec1e30bed601ee56b0ad9d6b27240
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+Essentially, Flash Attention makes sure that all intermediate write and read operations can be done using the fast on-chip SRAM memory instead of having to access the slower VRAM memory to compute the output vector \( \mathbf{O} \) .
+In practice, there is currently absolutely no reason to not use Flash Attention if available. The algorithm gives mathematically the same outputs, and is both faster and more memory-efficient.
+Let's look at a practical example.
+Our OctoCoder model now gets a significantly longer input prompt which includes a so-called system prompt. System prompts are used to steer the LLM into a better assistant that is tailored to the users' task.
+In the following, we use a system prompt that will make OctoCoder a better coding assistant.
+thon
+system_prompt = """Below are a series of dialogues between various people and an AI technical assistant.
+The assistant tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble but knowledgeable.
+The assistant is happy to help with code questions and will do their best to understand exactly what is needed.
+It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer.
+That said, the assistant is practical really does its best, and doesn't let caution get too much in the way of being useful.
+The Starcoder models are a series of 15.5B parameter models trained on 80+ programming languages from The Stack (v1.2) (excluding opt-out requests).
+The model uses Multi Query Attention, was trained using the Fill-in-the-Middle objective, and with 8,192 tokens context window for a trillion tokens of heavily deduplicated data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3778d8a3fc1ce85395e384829655f98725464cc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_12.txt
@@ -0,0 +1,47 @@
+Question: Write a function that takes two lists and returns a list that has alternating elements from each input list.
+Answer: Sure. Here is a function that does that.
+def alternating(list1, list2):
+   results = []
+   for i in range(len(list1)):
+       results.append(list1[i])
+       results.append(list2[i])
+   return results
+Question: Can you write some test cases for this function?
+Answer: Sure, here are some tests.
+assert alternating([10, 20, 30], [1, 2, 3]) == [10, 1, 20, 2, 30, 3]
+assert alternating([True, False], [4, 5]) == [True, 4, False, 5]
+assert alternating([], []) == []
+Question: Modify the function so that it returns all input elements when the lists have uneven length. The elements from the longer list should be at the end.
+Answer: Here is the modified function.
+def alternating(list1, list2):
+   results = []
+   for i in range(min(len(list1), len(list2))):
+       results.append(list1[i])
+       results.append(list2[i])
+   if len(list1) > len(list2):
+       results.extend(list1[i+1:])
+   else:
+       results.extend(list2[i+1:])
+   return results
+
+"""
+``
+For demonstration purposes, we duplicate the system prompt by ten so that the input length is long enough to observe Flash Attention's memory savings.
+We append the original text prompt"Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"`
+python
+long_prompt = 10 * system_prompt + prompt
+We instantiate our model again in bfloat16 precision.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+
+Let's now run the model just like before without Flash Attention and measure the peak GPU memory requirement and inference time.
+thon
+import time
+start_time = time.time()
+result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
+
+Output:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99a466525be66fbaa7e26adf303fecda06d78bdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_13.txt
@@ -0,0 +1,28 @@
+Output:
+
+Generated in 10.96854019165039 seconds.
+Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+`
+We're getting the same output as before, however this time, the model repeats the answer multiple times until it's 60 tokens cut-off. This is not surprising as we've repeated the system prompt ten times for demonstration purposes and thus cued the model to repeat itself.
+Note that the system prompt should not be repeated ten times in real-world applications - one time is enough!
+Let's measure the peak GPU memory requirement.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+37.668193340301514
+As we can see the peak GPU memory requirement is now significantly higher than in the beginning, which is largely due to the longer input sequence. Also the generation takes a little over a minute now.
+We call flush() to free GPU memory for our next experiment.
+python
+flush()
+For comparison, let's run the same function, but enable Flash Attention instead.
+To do so, we convert the model to BetterTransformer and by doing so enabling PyTorch's SDPA self-attention which in turn is able to use Flash Attention.
+python
+model.to_bettertransformer()
+Now we run the exact same code snippet as before and under the hood Transformers will make use of Flash Attention.
+
+start_time = time.time()
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    result = pipe(long_prompt, max_new_tokens=60)[0]["generated_text"][len(long_prompt):]
+print(f"Generated in {time.time() - start_time} seconds.")
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ceb3513cb103f6e940bb8e88740af9e3cf062abc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_14.txt
@@ -0,0 +1,35 @@
+Output:
+Generated in 3.0211617946624756 seconds.
+ Sure. Here is a function that does that.\n\ndef bytes_to_giga(bytes):\n   return bytes / 1024 / 1024 / 1024\n\nAnswer: Sure. Here is a function that does that.\n\ndef
+We're getting the exact same result as before, but can observe a very significant speed-up thanks to Flash Attention.
+Let's measure the memory consumption one last time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+32.617331981658936
+And we're almost back to our original 29GB peak GPU memory from the beginning.
+We can observe that we only use roughly 100MB more GPU memory when passing a very long input sequence with Flash Attention compared to passing a short input sequence as done in the beginning.
+py
+flush()
+For more information on how to use Flash Attention, please have a look at this doc page.
+3. Architectural Innovations
+So far we have looked into improving computational and memory efficiency by:
+
+Casting the weights to a lower precision format
+Replacing the self-attention algorithm with a more memory- and compute efficient version
+
+Let's now look into how we can change the architecture of an LLM so that it is most effective and efficient for task that require long text inputs, e.g.:
+-   Retrieval augmented Questions Answering,
+-   Summarization,
+-   Chat
+Note that chat not only requires the LLM to handle long text inputs, but it also necessitates that the LLM is able to efficiently handle the back-and-forth dialogue between user and assistant (such as ChatGPT).
+Once trained, the fundamental LLM architecture is difficult to change, so it is important to make considerations about the LLM's tasks beforehand and accordingly optimize the model's architecture.
+There are two important components of the model architecture that quickly become memory and/or performance bottlenecks for large input sequences.
+
+The positional embeddings
+The key-value cache
+
+Let's go over each component in more detail
+3.1 Improving positional embeddings of LLMs
+Self-attention puts each token in relation to each other's tokens.
+As an example, the \( \text{Softmax}(\mathbf{QK}^T) \) matrix of the text input sequence "Hello", "I", "love", "you" could look as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7be27d403c6876cd6ce76e38c23d710744d5373
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_15.txt
@@ -0,0 +1,12 @@
+Each word token is given a probability mass at which it attends all other word tokens and, therefore is put into relation with all other word tokens. E.g. the word "love" attends to the word "Hello" with 5%, to "I" with 30%, and to itself with 65%.
+A LLM based on self-attention, but without position embeddings would have great difficulties in understanding the positions of the text inputs to each other.
+This is because the probability score computed by \( \mathbf{QK}^T \) relates each word token to each other word token in \( O(1) \) computations regardless of their relative positional distance to each other.
+Therefore, for the LLM without position embeddings each token appears to have the same distance to all other tokens, e.g. differentiating between "Hello I love you" and "You love I hello" would be very challenging.
+For the LLM to understand sentence order, an additional cue is needed and is usually applied in the form of positional encodings (or also called positional embeddings).
+Positional encodings, encode the position of each token into a numerical presentation that the LLM can leverage to better understand sentence order.
+The authors of the Attention Is All You Need paper introduced sinusoidal positional embeddings \( \mathbf{P} = \mathbf{p}_1, \ldots, \mathbf{p}_N \) .
+where each vector \( \mathbf{p}_i \) is computed as a sinusoidal function of its position \( i \) .
+The positional encodings are then simply added to the input sequence vectors \( \mathbf{\hat{X}} = \mathbf{\hat{x}}_1, \ldots, \mathbf{\hat{x}}_N \) = \( \mathbf{x}_1 + \mathbf{p}_1, \ldots, \mathbf{x}_N + \mathbf{p}_N \) thereby cueing the model to better learn sentence order.
+Instead of using fixed position embeddings, others (such as Devlin et al.) used learned positional encodings for which the positional embeddings
+\( \mathbf{P} \) are learned during training.
+Sinusoidal and learned position embeddings used to be the predominant methods to encode sentence order into LLMs, but a couple of problems related to these positional encodings were found:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..997c1d81c317b1d9648ef735719eb32248981622
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_16.txt
@@ -0,0 +1,14 @@
+Sinusoidal and learned position embeddings are both absolute positional embeddings, i.e. encoding a unique embedding for each position id: \( 0, \ldots, N \) . As shown by Huang et al. and Su et al., absolute positional embeddings lead to poor LLM performance for long text inputs. For long text inputs, it is advantageous if the model learns the relative positional distance input tokens have to each other instead of their absolute position.
+When using learned position embeddings, the LLM has to be trained on a fixed input length \( N \), which makes it difficult to extrapolate to an input length longer than what it was trained on.
+
+Recently, relative positional embeddings that can tackle the above mentioned problems have become more popular, most notably:
+
+Rotary Position Embedding (RoPE)
+ALiBi
+
+Both RoPE and ALiBi argue that it's best to cue the LLM about sentence order directly in the self-attention algorithm as it's there that word tokens are put into relation with each other. More specifically, sentence order should be cued by modifying the \( \mathbf{QK}^T \) computation.
+Without going into too many details, RoPE notes that positional information can be encoded into query-key pairs, e.g. \( \mathbf{q}_i \) and \( \mathbf{x}_j \) by rotating each vector by an angle \( \theta * i \) and \( \theta * j \) respectively with \( i, j \) describing each vectors sentence position:
+$$ \mathbf{\hat{q}}i^T \mathbf{\hat{x}}_j = \mathbf{{q}}_i^T \mathbf{R}{\theta, i -j} \mathbf{{x}}_j. $$
+\( \mathbf{R}_{\theta, i - j} \) thereby represents a rotational matrix. \( \theta \) is not learned during training, but instead set to a pre-defined value that depends on the maximum input sequence length during training.
+
+By doing so, the propability score between \( \mathbf{q}_i \) and \( \mathbf{q}_j \) is only affected if \( i \ne j \) and solely depends on the relative distance \( i - j \) regardless of each vector's specific positions \( i \) and \( j \) .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2def5368c405da68c8bbb3f4d9e1eec84c4f63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_17.txt
@@ -0,0 +1,22 @@
+RoPE is used in multiple of today's most important LLMs, such as:
+
+Falcon
+Llama
+PaLM
+
+As an alternative, ALiBi proposes a much simpler relative position encoding scheme. The relative distance that input tokens have to each other is added as a negative integer scaled by a pre-defined value m to each query-key entry of the \( \mathbf{QK}^T \) matrix right before the softmax computation.
+
+As shown in the ALiBi paper, this simple relative positional encoding allows the model to retain a high performance even at very long text input sequences.
+ALiBi is used in multiple of today's most important LLMs, such as:
+
+MPT
+BLOOM
+
+Both RoPE and ALiBi position encodings can extrapolate to input lengths not seen during training whereas it has been shown that extrapolation works much better out-of-the-box for ALiBi as compared to RoPE.
+For ALiBi, one simply increases the values of the lower triangular position matrix to match the length of the input sequence.
+For RoPE, keeping the same \( \theta \) that was used during training leads to poor results when passing text inputs much longer than those seen during training, c.f Press et al.. However, the community has found a couple of effective tricks that adapt \( \theta \), thereby allowing RoPE position embeddings to work well for extrapolated text input sequences (see here).
+
+Both RoPE and ALiBi are relative positional embeddings that are not learned during training, but instead are based on the following intuitions:
+ -   Positional cues about the text inputs should be given directly to the \( QK^T \) matrix of the self-attention layer
+ -   The LLM should be incentivized to learn a constant relative distance positional encodings have to each other
+ -   The further text input tokens are from each other, the lower the probability of their query-value probability. Both RoPE and ALiBi lower the query-key probability of tokens far away from each other. RoPE by decreasing their vector product by increasing the angle between the query-key vectors. ALiBi by adding large negative numbers to the vector product
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..355bd52329d386e8d06aeb2bfd4882c7dceada19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_18.txt
@@ -0,0 +1,14 @@
+In conclusion, LLMs that are intended to be deployed in tasks that require handling large text inputs are better trained with relative positional embeddings, such as RoPE and ALiBi. Also note that even if an LLM with RoPE and ALiBi has been trained only on a fixed length of say \( N_1 = 2048 \) it can still be used in practice with text inputs much larger than \( N_1 \), like \( N_2 = 8192 > N_1 \) by extrapolating the positional embeddings.
+3.2 The key-value cache
+Auto-regressive text generation with LLMs works by iteratively putting in an input sequence, sampling the next token, appending the next token to the input sequence, and continuing to do so until the LLM produces a token that signifies that the generation has finished.
+Please have a look at Transformer's Generate Text Tutorial to get a more visual explanation of how auto-regressive generation works.
+Let's run a quick code snippet to show how auto-regressive works in practice. We will simply take the most likely next token via torch.argmax.
+thon
+input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits = model(input_ids)["logits"][:, -1:]
+  next_token_id = torch.argmax(next_logits,dim=-1)
+input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+  print("shape of input_ids", input_ids.shape)
+generated_text = tokenizer.batch_decode(input_ids[:, -5:])
+generated_text
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf4b6e06d9b8991c2097b81da28e106573ccdc34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_19.txt
@@ -0,0 +1,21 @@
+Output:
+shape of input_ids torch.Size([1, 21])
+shape of input_ids torch.Size([1, 22])
+shape of input_ids torch.Size([1, 23])
+shape of input_ids torch.Size([1, 24])
+shape of input_ids torch.Size([1, 25])
+[' Here is a Python function']
+As we can see every time we increase the text input tokens by the just sampled token.
+With very few exceptions, LLMs are trained using the causal language modeling objective and therefore mask the upper triangle matrix of the attention score - this is why in the two diagrams above the attention scores are left blank (a.k.a have 0 probability). For a quick recap on causal language modeling you can refer to the Illustrated Self Attention blog.
+As a consequence, tokens never depend on previous tokens, more specifically the \( \mathbf{q}i \) vector is never put in relation with any key, values vectors \( \mathbf{k}_j, \mathbf{v}_j \) if \( j > i \) . Instead \( \mathbf{q}_i \) only attends to previous key-value vectors \( \mathbf{k}{m < i}, \mathbf{v}_{m < i} \text{ , for } m \in {0, \ldots i - 1} \). In order to reduce unnecessary computation, one can therefore cache each layer's key-value vectors for all previous timesteps.
+In the following, we will tell the LLM to make use of the key-value cache by retrieving and forwarding it for each forward pass.
+In Transformers, we can retrieve the key-value cache by passing the use_cache flag to the forward call and can then pass it with the current token.
+thon
+past_key_values = None # past_key_values is the key-value cache
+generated_tokens = []
+next_token_id = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
+for _ in range(5):
+  next_logits, past_key_values = model(next_token_id, past_key_values=past_key_values, use_cache=True).to_tuple()
+  next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..117a3b0d09a1328f3db670e8eede9fae1be8adac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+As of writing this document, the largest GPU chip on the market is the A100 & H100 offering 80GB of VRAM. Most of the models listed before require more than 80GB just to be loaded and therefore necessarily require tensor parallelism and/or pipeline parallelism.
+🤗 Transformers does not support tensor parallelism out of the box as it requires the model architecture to be written in a specific way. If you're interested in writing models in a tensor-parallelism-friendly way, feel free to have a look at the text-generation-inference library.
+Naive pipeline parallelism is supported out of the box. For this, simply load the model with device="auto" which will automatically place the different layers on the available GPUs as explained here.
+Note, however that while very effective, this naive pipeline parallelism does not tackle the issues of GPU idling. For this more advanced pipeline parallelism is required as explained here.
+If you have access to an 8 x 80GB A100 node, you could load BLOOM as follows
+
+!pip install transformers accelerate bitsandbytes optimum
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloom", device_map="auto", pad_token_id=0)
+
+By using device_map="auto" the attention layers would be equally distributed over all available GPUs.
+In this guide, we will use bigcode/octocoder as it can be run on a single 40 GB A100 GPU device chip. Note that all memory and speed optimizations that we will apply going forward, are equally applicable to models that require model or tensor parallelism.
+Since the model is loaded in bfloat16 precision, using our rule of thumb above, we would expect the memory requirement to run inference with bigcode/octocoder to be around 31 GB VRAM. Let's give it a try.
+We first load the model and tokenizer and then pass both to Transformers' pipeline object.
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", torch_dtype=torch.bfloat16, device_map="auto", pad_token_id=0)
+tokenizer = AutoTokenizer.from_pretrained("bigcode/octocoder")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dad44f6719e28b75f1b0d97c0b69a1970eab9be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_20.txt
@@ -0,0 +1,7 @@
+next_logits = next_logits[:, -1:]
+  next_token_id = torch.argmax(next_logits, dim=-1)
+print("shape of input_ids", next_token_id.shape)
+  print("length of key-value cache", len(past_key_values[0][0]))  # past_key_values are of shape [num_layers, 0 for k, 1 for v, batch_size, length, hidden_dim]
+  generated_tokens.append(next_token_id.item())
+generated_text = tokenizer.batch_decode(generated_tokens)
+generated_text
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..668dcf8e2882f625ec4fa58d6d819549f6bfbf90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_21.txt
@@ -0,0 +1,23 @@
+Output:
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 20
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 21
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 22
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 23
+shape of input_ids torch.Size([1, 1])
+length of key-value cache 24
+[' Here', ' is', ' a', ' Python', ' function']
+As one can see, when using the key-value cache the text input tokens are not increased in length, but remain a single input vector. The length of the key-value cache on the other hand is increased by one at every decoding step.
+
+Making use of the key-value cache means that the \( \mathbf{QK}^T \) is essentially reduced to \( \mathbf{q}_c\mathbf{K}^T \) with \( \mathbf{q}_c \) being the query projection of the currently passed input token which is always just a single vector.
+
+Using the key-value cache has two advantages:
+-   Significant increase in computational efficiency as less computations are performed compared to computing the full \( \mathbf{QK}^T \) matrix. This leads to an increase in inference speed
+-   The maximum required memory is not increased quadratically with the number of generated tokens, but only increases linearly.
+
+One should always make use of the key-value cache as it leads to identical results and a significant speed-up for longer input sequences. Transformers has the key-value cache enabled by default when making use of the text pipeline or the generate method.
+
+Note that, despite our advice to use key-value caches, your LLM output may be slightly different when you use them. This is a property of the matrix multiplication kernels themselves -- you can read more about it here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..957e830f1045b23dddde030ae0c9551177425354
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_22.txt
@@ -0,0 +1,13 @@
+3.2.1 Multi-round conversation
+The key-value cache is especially useful for applications such as chat where multiple passes of auto-regressive decoding are required. Let's look at an example.
+User: How many people live in France?
+Assistant: Roughly 75 million people live in France
+User: And how many are in Germany?
+Assistant: Germany has ca. 81 million inhabitants
+In this chat, the LLM runs auto-regressive decoding twice:
+  1. The first time, the key-value cache is empty and the input prompt is "User: How many people live in France?" and the model auto-regressively generates the text "Roughly 75 million people live in France" while increasing the key-value cache at every decoding step.
+  2. The second time the input prompt is "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many in Germany?". Thanks to the cache, all key-value vectors for the first two sentences are already computed. Therefore the input prompt only consists of "User: And how many in Germany?". While processing the shortened input prompt, it's computed key-value vectors are concatenated to the key-value cache of the first decoding. The second Assistant's answer "Germany has ca. 81 million inhabitants" is then auto-regressively generated with the key-value cache consisting of encoded key-value vectors of "User: How many people live in France? \n Assistant: Roughly 75 million people live in France \n User: And how many are in Germany?".
+Two things should be noted here:
+  1. Keeping all the context is crucial for LLMs deployed in chat so that the LLM understands all the previous context of the conversation. E.g. for the example above the LLM needs to understand that the user refers to the population when asking "And how many are in Germany".
+  2. The key-value cache is extremely useful for chat as it allows us to continuously grow the encoded chat history instead of having to re-encode the chat history again from scratch (as e.g. would be the case when using an encoder-decoder architecture).
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57d4b8e8f88d615ea3a8e8d34e0cd0a6de9a7262
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_23.txt
@@ -0,0 +1,17 @@
+In transformers, a generate call will return past_key_values when return_dict_in_generate=True is passed, in addition to the default use_cache=True. Note that it is not yet available through the pipeline interface.
+thon
+Generation as usual
+prompt = system_prompt + "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(**model_inputs, max_new_tokens=60, return_dict_in_generate=True)
+decoded_output = tokenizer.batch_decode(generation_output.sequences)[0]
+Piping the returned past_key_values to speed up the next conversation round
+prompt = decoded_output + "\nQuestion: How can I modify the function above to return Mega bytes instead?\n\nAnswer: Here"
+model_inputs = tokenizer(prompt, return_tensors='pt')
+generation_output = model.generate(
+  **model_inputs,
+  past_key_values=generation_output.past_key_values,
+  max_new_tokens=60,
+  return_dict_in_generate=True
+)
+tokenizer.batch_decode(generation_output.sequences)[0][len(prompt):]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd123883be00a1dc51f2cca11856d9a5e6d83e3a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_24.txt
@@ -0,0 +1,20 @@
+Output:
+
+ is a modified version of the function that returns Mega bytes instead.
+def bytes_to_megabytes(bytes):
+   return bytes / 1024 / 1024
+Answer: The function takes a number of bytes as input and returns the number of
+
+Great, no additional time is spent recomputing the same key and values for the attention layer! There is however one catch. While the required peak memory for the \( \mathbf{QK}^T \) matrix is significantly reduced, holding the key-value cache in memory can become very memory expensive for long input sequences or multi-turn chat. Remember that the key-value cache needs to store the key-value vectors for all previous input vectors \( \mathbf{x}_i \text{, for } i \in {1, \ldots, c - 1} \) for all self-attention layers and for all attention heads.
+Let's compute the number of float values that need to be stored in the key-value cache for the LLM bigcode/octocoder that we used before.
+The number of float values amounts to two times the sequence length times the number of attention heads times the attention head dimension and times the number of layers.
+Computing this for our LLM at a hypothetical input sequence length of 16000 gives:
+python
+config = model.config
+2 * 16_000 * config.n_layer * config.n_head * config.n_embd // config.n_head
+Output:
+7864320000
+Roughly 8 billion float values! Storing 8 billion float values in float16 precision requires around 15 GB of RAM which is circa half as much as the model weights themselves!
+Researchers have proposed two methods that allow to significantly reduce the memory cost of storing the key-value cache, which are explored in the next subsections.
+3.2.2 Multi-Query-Attention (MQA)
+Multi-Query-Attention was proposed in Noam Shazeer's Fast Transformer Decoding: One Write-Head is All You Need paper. As the title says, Noam found out that instead of using n_head key-value projections weights, one can use a single head-value projection weight pair that is shared across all attention heads without that the model's performance significantly degrades.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de3ae22aa94eeccb5816945d8c9e74bd799e4ab5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_25.txt
@@ -0,0 +1,12 @@
+By using a single head-value projection weight pair, the key value vectors \( \mathbf{k}_i, \mathbf{v}_i \) have to be identical across all attention heads which in turn means that we only need to store 1 key-value projection pair in the cache instead of n_head ones.
+
+As most LLMs use between 20 and 100 attention heads, MQA significantly reduces the memory consumption of the key-value cache. For the LLM used in this notebook we could therefore reduce the required memory consumption from 15 GB to less than 400 MB at an input sequence length of 16000.
+In addition to memory savings, MQA also leads to improved computational efficiency as explained in the following.
+In auto-regressive decoding, large key-value vectors need to be reloaded, concatenated with the current key-value vector pair to be then fed into the \( \mathbf{q}_c\mathbf{K}^T \) computation at every step. For auto-regressive decoding, the required memory bandwidth for the constant reloading can become a serious time bottleneck. By reducing the size of the key-value vectors less memory needs to be accessed, thus reducing the memory bandwidth bottleneck. For more detail, please have a look at Noam's paper.
+The important part to understand here is that reducing the number of key-value attention heads to 1 only makes sense if a key-value cache is used. The peak memory consumption of the model for a single forward pass without key-value cache stays unchanged as every attention head still has a unique query vector so that each attention head still has a different \( \mathbf{QK}^T \) matrix.
+MQA has seen wide adoption by the community and is now used by many of the most popular LLMs:
+
+Falcon
+PaLM
+MPT
+BLOOM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32c1008aa5fee557c6bd666939ac7a35721c4f0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_26.txt
@@ -0,0 +1,13 @@
+Falcon
+PaLM
+MPT
+BLOOM
+
+Also, the checkpoint used in this notebook - bigcode/octocoder - makes use of MQA.
+3.2.3 Grouped-Query-Attention (GQA)
+Grouped-Query-Attention, as proposed by Ainslie et al. from Google, found that using MQA can often lead to quality degradation compared to using vanilla multi-key-value head projections. The paper argues that more model performance can be kept by less drastically reducing the number of query head projection weights. Instead of using just a single key-value projection weight, n < n_head key-value projection weights should be used. By choosing n to a significantly smaller value than n_head, such as 2,4 or 8 almost all of the memory and speed gains from MQA can be kept while sacrificing less model capacity and thus arguably less performance.
+Moreover, the authors of GQA found out that existing model checkpoints can be uptrained to have a GQA architecture with as little as 5% of the original pre-training compute. While 5% of the original pre-training compute can still be a massive amount, GQA uptraining allows existing checkpoints to be useful for longer input sequences.
+GQA was only recently proposed which is why there is less adoption at the time of writing this notebook.
+The most notable application of GQA is Llama-v2.
+
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_27.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1616ca3e79b5cb7c13bb6f7f035914dbc367fff3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_27.txt
@@ -0,0 +1,6 @@
+As a conclusion, it is strongly recommended to make use of either GQA or MQA if the LLM is deployed with auto-regressive decoding and is required to handle large input sequences as is the case for example for chat.
+
+Conclusion
+The research community is constantly coming up with new, nifty ways to speed up inference time for ever-larger LLMs. As an example, one such promising research direction is speculative decoding where "easy tokens" are generated by smaller, faster language models and only "hard tokens" are generated by the LLM itself. Going into more detail is out of the scope of this notebook, but can be read upon in this nice blog post.
+The reason massive LLMs such as GPT3/4, Llama-2-70b, Claude, PaLM can run so quickly in chat-interfaces such as Hugging Face Chat or ChatGPT is to a big part thanks to the above-mentioned improvements in precision, algorithms, and architecture.
+Going forward, accelerators such as GPUs, TPUs, etc will only get faster and allow for more memory, but one should nevertheless always make sure to use the best available algorithms and architectures to get the most bang for your buck 🤗
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..698e7446d5dcfbca62fdd04dd81f5856eccd7ee4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+thon
+prompt = "Question: Please write a function in Python that transforms bytes to Giga bytes.\n\nAnswer:"
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we can now directly use the result to convert bytes into Gigabytes.
+python
+def bytes_to_giga_bytes(bytes):
+  return bytes / 1024 / 1024 / 1024
+Let's call torch.cuda.max_memory_allocated to measure the peak GPU memory allocation.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+
+29.0260648727417
+Close enough to our back-of-the-envelope computation! We can see the number is not exactly correct as going from bytes to kilobytes requires a multiplication of 1024 instead of 1000. Therefore the back-of-the-envelope formula can also be understood as an "at most X GB" computation.
+Note that if we had tried to run the model in full float32 precision, a whopping 64 GB of VRAM would have been required.
+
+Almost all models are trained in bfloat16 nowadays, there is no reason to run the model in full float32 precision if your GPU supports bfloat16. Float32 won't give better inference results than the precision that was used to train the model.
+
+If you are unsure in which format the model weights are stored on the Hub, you can always look into the checkpoint's config under "torch_dtype", e.g. here. It is recommended to set the model to the same precision type as written in the config when loading with from_pretrained(, torch_dtype=) except when the original type is float32 in which case one can use both float16 or bfloat16 for inference.
+Let's define a flush() function to free all allocated memory so that we can accurately measure the peak allocated GPU memory.
+thon
+del pipe
+del model
+import gc
+import torch
+def flush():
+  gc.collect()
+  torch.cuda.empty_cache()
+  torch.cuda.reset_peak_memory_stats()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f4ffe3fac44eec18645ebbfb99248ca53920c2f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+Let's call it now for the next experiment.
+python
+flush()
+In the recent version of the accelerate library, you can also use an utility method called release_memory()
+thon
+from accelerate.utils import release_memory
+
+release_memory(model)
+
+Now what if your GPU does not have 32 GB of VRAM? It has been found that model weights can be quantized to 8-bit or 4-bits without a significant loss in performance (see Dettmers et al.).
+Model can be quantized to even 3 or 2 bits with an acceptable loss in performance as shown in the recent GPTQ paper 🤯.
+Without going into too many details, quantization schemes aim at reducing the precision of weights while trying to keep the model's inference results as accurate as possible (a.k.a as close as possible to bfloat16).
+Note that quantization works especially well for text generation since all we care about is choosing the set of most likely next tokens and don't really care about the exact values of the next token logit distribution.
+All that matters is that the next token logit distribution stays roughly the same so that an argmax or topk operation gives the same results.
+There are various quantization techniques, which we won't discuss in detail here, but in general, all quantization techniques work as follows:
+
+Quantize all weights to the target precision
+
+Load the quantized weights, and pass the input sequence of vectors in bfloat16 precision
+
+Dynamically dequantize weights to bfloat16 to perform the computation with their input vectors in bfloat16 precision
+
+In a nutshell, this means that inputs-weight matrix multiplications, with \( X \) being the inputs, \( W \) being a weight matrix and \( Y \) being the output:
+$$ Y = X * W $$
+are changed to
+$$ Y = X * \text{dequantize}(W) $$
+for every matrix multiplication. Dequantization and re-quantization is performed sequentially for all weight matrices as the inputs run through the network graph.
+Therefore, inference time is often not reduced when using quantized weights, but rather increases.
+Enough theory, let's give it a try! To quantize the weights with Transformers, you need to make sure that
+the bitsandbytes library is installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8773caf1ee48fe2d4b3788013249244e3f48d133
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_5.txt
@@ -0,0 +1,31 @@
+!pip install bitsandbytes
+We can then load models in 8-bit quantization by simply adding a load_in_8bit=True flag to from_pretrained.
+python
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_8bit=True, pad_token_id=0)
+Now, let's run our example again and measure the memory usage.
+thon
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
+
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\npython\ndef bytes_to_giga_bytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single
+Nice, we're getting the same result as before, so no loss in accuracy! Let's look at how much memory was used this time.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+15.219234466552734
+Significantly less! We're down to just a bit over 15 GBs and could therefore run this model on consumer GPUs like the 4090.
+We're seeing a very nice gain in memory efficiency and more or less no degradation to the model's output. However, we can also notice a slight slow-down during inference.
+We delete the models and flush the memory again.
+python
+del model
+del pipe
+python
+flush()
+Let's see what peak GPU memory consumption 4-bit quantization gives. Quantizing the model to 4-bit can be done with the same API as before - this time by passing load_in_4bit=True instead of load_in_8bit=True.
+thon
+model = AutoModelForCausalLM.from_pretrained("bigcode/octocoder", load_in_4bit=True, low_cpu_mem_usage=True, pad_token_id=0)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+result = pipe(prompt, max_new_tokens=60)[0]["generated_text"][len(prompt):]
+result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90dbda286aab3c6248544f6c4cc6dd9bf5ff8507
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Output:
+Here is a Python function that transforms bytes to Giga bytes:\n\n\ndef bytes_to_gigabytes(bytes):\n    return bytes / 1024 / 1024 / 1024\n\n\nThis function takes a single argument
+We're almost seeing the same output text as before - just the python is missing just before the code snippet. Let's see how much memory was required.
+python
+bytes_to_giga_bytes(torch.cuda.max_memory_allocated())
+Output:
+9.543574333190918
+Just 9.5GB! That's really not a lot for a >15 billion parameter model.
+While we see very little degradation in accuracy for our model here, 4-bit quantization can in practice often lead to different results compared to 8-bit quantization or full bfloat16 inference. It is up to the user to try it out.
+Also note that inference here was again a bit slower compared to 8-bit quantization which is due to the more aggressive quantization method used for 4-bit quantization leading to \( \text{quantize} \) and \( \text{dequantize} \) taking longer during inference.
+python
+del model
+del pipe
+python
+flush()
+Overall, we saw that running OctoCoder in 8-bit precision reduced the required GPU VRAM from 32G GPU VRAM to only 15GB and running the model in 4-bit precision further reduces the required GPU VRAM to just a bit over 9GB.
+4-bit quantization allows the model to be run on GPUs such as RTX3090, V100, and T4 which are quite accessible for most people.
+For more information on quantization and to see how one can quantize models to require even less GPU VRAM memory than 4-bit, we recommend looking into the AutoGPTQ implementation.
+
+As a conclusion, it is important to remember that model quantization trades improved memory efficiency against accuracy and in some cases inference time.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d82c748470df0a91408deaa99096f6b39664a2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_7.txt
@@ -0,0 +1,11 @@
+If GPU memory is not a constraint for your use case, there is often no need to look into quantization. However many GPUs simply can't run LLMs without quantization methods and in this case, 4-bit and 8-bit quantization schemes are extremely useful tools.
+For more in-detail usage information, we strongly recommend taking a look at the Transformers Quantization Docs.
+Next, let's look into how we can improve computational and memory efficiency by using better algorithms and an improved model architecture.
+2. Flash Attention
+Today's top-performing LLMs share more or less the same fundamental architecture that consists of feed-forward layers, activation layers, layer normalization layers, and most crucially, self-attention layers.
+Self-attention layers are central to Large Language Models (LLMs) in that they enable the model to understand the contextual relationships between input tokens.
+However, the peak GPU memory consumption for self-attention layers grows quadratically both in compute and memory complexity with number of input tokens (also called sequence length) that we denote in the following by \( N \) .
+While this is not really noticeable for shorter input sequences (of up to 1000 input tokens), it becomes a serious problem for longer input sequences (at around 16000 input tokens).
+Let's take a closer look. The formula to compute the output \( \mathbf{O} \) of a self-attention layer for an input \( \mathbf{X} \) of length \( N \) is:
+$$ \textbf{O} = \text{Attn}(\mathbf{X}) = \mathbf{V} \times \text{Softmax}(\mathbf{QK}^T) \text{ with } \mathbf{Q} = \mathbf{W}_q \mathbf{X}, \mathbf{V} = \mathbf{W}_v \mathbf{X}, \mathbf{K} = \mathbf{W}_k \mathbf{X} $$
+\(  \mathbf{X} = (\mathbf{x}1,  \mathbf{x}{N}) \) is thereby the input sequence to the attention layer. The projections \( \mathbf{Q} \) and \( \mathbf{K} \) will each consist of \( N \) vectors resulting in the \( \mathbf{QK}^T \) being of size \( N^2 \) .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5023e9a04aba78ecf9d75b11b32880bbb8370b8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+LLMs usually have multiple attention heads, thus doing multiple self-attention computations in parallel.
+Assuming, the LLM has 40 attention heads and runs in bfloat16 precision, we can calculate the memory requirement to store the \( \mathbf{QK^T} \) matrices to be \( 40 * 2 * N^2 \) bytes. For \( N=1000 \) only around 50 MB of VRAM are needed, however, for \( N=16000 \) we would need 19 GB of VRAM, and for \( N=100,000 \) we would need almost 1TB just to store the \( \mathbf{QK}^T \) matrices.
+Long story short, the default self-attention algorithm quickly becomes prohibitively memory-expensive for large input contexts.
+As LLMs improve in text comprehension and generation, they are applied to increasingly complex tasks. While models once handled the translation or summarization of a few sentences, they now manage entire pages, demanding the capability to process extensive input lengths.
+How can we get rid of the exorbitant memory requirements for large input lengths? We need a new way to compute the self-attention mechanism that gets rid of the \( QK^T \) matrix. Tri Dao et al. developed exactly such a new algorithm and called it Flash Attention.
+In a nutshell, Flash Attention breaks the  \(\mathbf{V} \times \text{Softmax}(\mathbf{QK}^T\)) computation apart and instead computes smaller chunks of the output by iterating over multiple softmax computation steps:
+$$ \textbf{O}i \leftarrow s^a{ij} * \textbf{O}i + s^b{ij} * \mathbf{V}{j} \times \text{Softmax}(\mathbf{QK}^T{i,j}) \text{ for multiple } i, j \text{ iterations} $$
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10efa860898d1924f2b883757670ce8d921a528a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_llm_tutorial_optimization.txt_chunk_9.txt
@@ -0,0 +1,3 @@
+with \( s^a_{ij} \) and \( s^b_{ij} \) being some softmax normalization statistics that need to be recomputed for every \( i \) and \( j \) .
+Please note that the whole Flash Attention is a bit more complex and is greatly simplified here as going in too much depth is out of scope for this guide. The reader is invited to take a look at the well-written Flash Attention paper for more details.
+The main takeaway here is:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eaeee8b4457af15911ca93be65575265ea6c75d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+Model training anatomy
+To understand performance optimization techniques that one can apply to improve efficiency of model training 
+speed and memory utilization, it's helpful to get familiar with how GPU is utilized during training, and how compute 
+intensity varies depending on an operation performed.
+Let's start by exploring a motivating example of GPU utilization and the training run of a model. For the demonstration, 
+we'll need to install a few libraries: 
+
+pip install transformers datasets accelerate nvidia-ml-py3
+The nvidia-ml-py3 library allows us to monitor the memory usage of the models from within Python. You might be familiar 
+with the nvidia-smi command in the terminal - this library allows to access the same information in Python directly.
+Then, we create some dummy data: random token IDs between 100 and 30000 and binary labels for a classifier. 
+In total, we get 512 sequences each with length 512 and store them in a [~datasets.Dataset] with PyTorch format.
+
+import numpy as np
+from datasets import Dataset
+seq_len, dataset_size = 512, 512
+dummy_data = {
+     "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+     "labels": np.random.randint(0, 1, (dataset_size)),
+ }
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+To print summary statistics for the GPU utilization and the training run with the [Trainer] we define two helper functions:
+
+from pynvml import *
+def print_gpu_utilization():
+     nvmlInit()
+     handle = nvmlDeviceGetHandleByIndex(0)
+     info = nvmlDeviceGetMemoryInfo(handle)
+     print(f"GPU memory occupied: {info.used//1024**2} MB.")
+def print_summary(result):
+     print(f"Time: {result.metrics['train_runtime']:.2f}")
+     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+     print_gpu_utilization()
+
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bfe82997650b575088f1ac6d88781b4d59335f8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+Let's verify that we start with a free GPU memory:
+
+print_gpu_utilization()
+GPU memory occupied: 0 MB.
+
+That looks good: the GPU memory is not occupied as we would expect before we load any models. If that's not the case on 
+your machine make sure to stop all processes that are using GPU memory. However, not all free GPU memory can be used by 
+the user. When a model is loaded to the GPU the kernels are also loaded, which can take up 1-2GB of memory. To see how 
+much it is we load a tiny tensor into the GPU which triggers the kernels to be loaded as well.
+
+import torch
+torch.ones((1, 1)).to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 1343 MB.
+
+We see that the kernels alone take up 1.3GB of GPU memory. Now let's see how much space the model uses.
+Load Model
+First, we load the google-bert/bert-large-uncased model. We load the model weights directly to the GPU so that we can check 
+how much space just the weights use.
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-large-uncased").to("cuda")
+print_gpu_utilization()
+GPU memory occupied: 2631 MB.
+
+We can see that the model weights alone take up 1.3 GB of GPU memory. The exact number depends on the specific 
+GPU you are using. Note that on newer GPUs a model can sometimes take up more space since the weights are loaded in an 
+optimized fashion that speeds up the usage of the model. Now we can also quickly check if we get the same result 
+as with nvidia-smi CLI:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eabca272ad63d7859e70eb11eb25d70e0afd71b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+nvidia-smi
+```bash
+Tue Jan 11 08:58:05 2022
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
+|-------------------------------+----------------------+----------------------+
+| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+|                               |                      |               MIG M. |
+|===============================+======================+======================|
+|   0  Tesla V100-SXM2  On   | 00000000:00:04.0 Off |                    0 |
+| N/A   37C    P0    39W / 300W |   2631MiB / 16160MiB |      0%      Default |
+|                               |                      |                  N/A |
++-------------------------------+----------------------+----------------------+
++-----------------------------------------------------------------------------+
+| Processes:                                                                  |
+|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+|        ID   ID                                                   Usage      |
+|=============================================================================|
+|    0   N/A  N/A      3721      C   nvs/codeparrot/bin/python     2629MiB |
++-----------------------------------------------------------------------------+
+
+We get the same number as before and you can also see that we are using a V100 GPU with 16GB of memory. So now we can 
+start training the model and see how the GPU memory consumption changes. First, we set up a few standard training 
+arguments:
+py
+default_args = {
+    "output_dir": "tmp",
+    "eval_strategy": "steps",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+If you plan to run multiple experiments, in order to properly clear the memory between experiments, restart the Python 
+ kernel between experiments.
+
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e69d37e0bda4f8e43430ab69a7a43925b4e24d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+Memory utilization at vanilla training
+Let's use the [Trainer] and train the model without using any GPU performance optimization techniques and a batch size of 4:
+
+from transformers import TrainingArguments, Trainer, logging
+logging.set_verbosity_error()
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+
+Time: 57.82
+Samples/second: 8.86
+GPU memory occupied: 14949 MB.
+We see that already a relatively small batch size almost fills up our GPU's entire memory. However, a larger batch size 
+can often result in faster model convergence or better end performance. So ideally we want to tune the batch size to our
+model's needs and not to the GPU limitations. What's interesting is that we use much more memory than the size of the model. 
+To understand a bit better why this is the case let's have a look at a model's operations and memory needs.
+Anatomy of Model's Operations
+Transformers architecture includes 3 main groups of operations grouped below by compute-intensity.
+
+Tensor Contractions
+Linear layers and components of Multi-Head Attention all do batched matrix-matrix multiplications. These operations are the most compute-intensive part of training a transformer.
+
+Statistical Normalizations
+Softmax and layer normalization are less compute-intensive than tensor contractions, and involve one or more reduction operations, the result of which is then applied via a map.
+
+Element-wise Operators
+These are the remaining operators: biases, dropout, activations, and residual connections. These are the least compute-intensive operations.
+
+This knowledge can be helpful to know when analyzing performance bottlenecks.
+This summary is derived from Data Movement Is All You Need: A Case Study on Optimizing Transformers 2020
+Anatomy of Model's Memory
+We've seen that training the model uses much more memory than just putting the model on the GPU. This is because there 
+are many components during training that use GPU memory. The components on GPU memory are the following:
+
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..692f6a65e78ee177a412402749641794f19638ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_4.txt
@@ -0,0 +1,29 @@
+model weights
+optimizer states
+gradients
+forward activations saved for gradient computation
+temporary buffers
+functionality-specific memory
+
+A typical model trained in mixed precision with AdamW requires 18 bytes per model parameter plus activation memory. For 
+inference there are no optimizer states and gradients, so we can subtract those. And thus we end up with 6 bytes per 
+model parameter for mixed precision inference, plus activation memory.
+Let's look at the details.
+Model Weights:
+
+4 bytes * number of parameters for fp32 training
+6 bytes * number of parameters for mixed precision training (maintains a model in fp32 and one in fp16 in memory)
+
+Optimizer States:
+
+8 bytes * number of parameters for normal AdamW (maintains 2 states)
+2 bytes * number of parameters for 8-bit AdamW optimizers like bitsandbytes
+4 bytes * number of parameters for optimizers like SGD with momentum (maintains only 1 state)
+
+Gradients
+
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e820e2605f9dcf7483cda6a2e951d00d050f5ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_memory_anatomy.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+4 bytes * number of parameters for either fp32 or mixed precision training (gradients are always kept in fp32)
+
+Forward Activations
+
+size depends on many factors, the key ones being sequence length, hidden size and batch size.
+
+There are the input and output that are being passed and returned by the forward and the backward functions and the 
+forward activations saved for gradient computation.
+Temporary Memory
+Additionally, there are all kinds of temporary variables which get released once the calculation is done, but in the 
+moment these could require additional memory and could push to OOM. Therefore, when coding it's crucial to think 
+strategically about such temporary variables and sometimes to explicitly free those as soon as they are no longer needed.
+Functionality-specific memory
+Then, your software could have special memory needs. For example, when generating text using beam search, the software 
+needs to maintain multiple copies of inputs and outputs.
+forward vs backward Execution Speed
+For convolutions and linear layers there are 2x flops in the backward compared to the forward, which generally translates 
+into ~2x slower (sometimes more, because sizes in the backward tend to be more awkward). Activations are usually 
+bandwidth-limited, and it’s typical for an activation to have to read more data in the backward than in the forward 
+(e.g. activation forward reads once, writes once, activation backward reads twice, gradOutput and output of the forward, 
+and writes once, gradInput).
+As you can see, there are potentially a few places where we could save GPU memory or speed up operations. 
+Now that you understand what affects GPU utilization and computation speed, refer to 
+the Methods and tools for efficient training on a single GPU documentation page to learn about 
+performance optimization techniques.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9c6ec8f0e0ae8eb2205f06f16bfaced27ced064
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Share a model
+The last two tutorials showed how you can fine-tune a model with PyTorch, Keras, and 🤗 Accelerate for distributed setups. The next step is to share your model with the community! At Hugging Face, we believe in openly sharing knowledge and resources to democratize artificial intelligence for everyone. We encourage you to consider sharing your model with the community to help others save time and resources.
+In this tutorial, you will learn two methods for sharing a trained or fine-tuned model on the Model Hub:
+
+Programmatically push your files to the Hub.
+Drag-and-drop your files to the Hub with the web interface.
+
+To share a model with the community, you need an account on huggingface.co. You can also join an existing organization or create a new one.
+
+Repository features
+Each repository on the Model Hub behaves like a typical GitHub repository. Our repositories offer versioning, commit history, and the ability to visualize differences.
+The Model Hub's built-in versioning is based on git and git-lfs. In other words, you can treat one model as one repository, enabling greater access control and scalability. Version control allows revisions, a method for pinning a specific version of a model with a commit hash, tag or branch.
+As a result, you can load a specific model version with the revision parameter:
+
+model = AutoModel.from_pretrained(
+     "julien-c/EsperBERTo-small", revision="v2.0.1"  # tag name, or branch name, or commit hash
+ )
+
+Files are also easily edited in a repository, and you can view the commit history as well as the difference:
+
+Setup
+Before sharing a model to the Hub, you will need your Hugging Face credentials. If you have access to a terminal, run the following command in the virtual environment where 🤗 Transformers is installed. This will store your access token in your Hugging Face cache folder (~/.cache/ by default):
+
+huggingface-cli login
+If you are using a notebook like Jupyter or Colaboratory, make sure you have the huggingface_hub library installed. This library allows you to programmatically interact with the Hub.
+
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0d7a7d8c4ed33b27f7a32a6ebd445b94a2bb1d6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+pip install huggingface_hub
+Then use notebook_login to sign-in to the Hub, and follow the link here to generate a token to login with:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Convert a model for all frameworks
+To ensure your model can be used by someone working with a different framework, we recommend you convert and upload your model with both PyTorch and TensorFlow checkpoints. While users are still able to load your model from a different framework if you skip this step, it will be slower because 🤗 Transformers will need to convert the checkpoint on-the-fly.
+Converting a checkpoint for another framework is easy. Make sure you have PyTorch and TensorFlow installed (see here for installation instructions), and then find the specific model for your task in the other framework. 
+
+Specify from_tf=True to convert a checkpoint from TensorFlow to PyTorch:
+
+pt_model = DistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_tf=True)
+pt_model.save_pretrained("path/to/awesome-name-you-picked")
+``
+</pt>
+<tf>
+Specifyfrom_pt=True` to convert a checkpoint from PyTorch to TensorFlow:
+
+tf_model = TFDistilBertForSequenceClassification.from_pretrained("path/to/awesome-name-you-picked", from_pt=True)
+
+Then you can save your new TensorFlow model with its new checkpoint:
+
+tf_model.save_pretrained("path/to/awesome-name-you-picked")
+
+If a model is available in Flax, you can also convert a checkpoint from PyTorch to Flax:
+
+flax_model = FlaxDistilBertForSequenceClassification.from_pretrained(
+     "path/to/awesome-name-you-picked", from_pt=True
+ )
+
+Push a model during training
+
+Sharing a model to the Hub is as simple as adding an extra parameter or callback. Remember from the fine-tuning tutorial, the [TrainingArguments] class is where you specify hyperparameters and additional training options. One of these training options includes the ability to push a model directly to the Hub. Set push_to_hub=True in your [TrainingArguments]:
+
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00a5dd3fec476c652b8e4f20b61ddd6ba82bd70b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_2.txt
@@ -0,0 +1,55 @@
+training_args = TrainingArguments(output_dir="my-awesome-model", push_to_hub=True)
+
+Pass your training arguments as usual to [Trainer]:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+After you fine-tune your model, call [~transformers.Trainer.push_to_hub] on [Trainer] to push the trained model to the Hub. 🤗 Transformers will even automatically add training hyperparameters, training results and framework versions to your model card!
+
+trainer.push_to_hub()
+``
+</pt>
+<tf>
+Share a model to the Hub with [PushToHubCallback]. In the [PushToHubCallback`] function, add:
+
+An output directory for your model.
+A tokenizer.
+The hub_model_id, which is your Hub username and model name.
+
+from transformers import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="./your_model_save_path", tokenizer=tokenizer, hub_model_id="your-username/my-awesome-model"
+ )
+
+Add the callback to fit, and 🤗 Transformers will push the trained model to the Hub:
+
+model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3, callbacks=push_to_hub_callback)
+
+Use the push_to_hub function
+You can also call push_to_hub directly on your model to upload it to the Hub.
+Specify your model name in push_to_hub:
+
+pt_model.push_to_hub("my-awesome-model")
+
+This creates a repository under your username with the model name my-awesome-model. Users can now load your model with the from_pretrained function:
+
+from transformers import AutoModel
+model = AutoModel.from_pretrained("your_username/my-awesome-model")
+
+If you belong to an organization and want to push your model under the organization name instead, just add it to the repo_id:
+
+pt_model.push_to_hub("my-awesome-org/my-awesome-model")
+
+The push_to_hub function can also be used to add other files to a model repository. For example, add a tokenizer to a model repository:
+
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..725bb0dc4a0e18c52cf0c7bf512624a71b3fe1af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_sharing.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+tokenizer.push_to_hub("my-awesome-model")
+
+Or perhaps you'd like to add the TensorFlow version of your fine-tuned PyTorch model:
+
+tf_model.push_to_hub("my-awesome-model")
+
+Now when you navigate to your Hugging Face profile, you should see your newly created model repository. Clicking on the Files tab will display all the files you've uploaded to the repository.
+For more details on how to create and upload files to a repository, refer to the Hub documentation here.
+Upload with the web interface
+Users who prefer a no-code approach are able to upload a model through the Hub's web interface. Visit huggingface.co/new to create a new repository:
+
+From here, add some information about your model:
+
+Select the owner of the repository. This can be yourself or any of the organizations you belong to.
+Pick a name for your model, which will also be the repository name.
+Choose whether your model is public or private.
+Specify the license usage for your model.
+
+Now click on the Files tab and click on the Add file button to upload a new file to your repository. Then drag-and-drop a file to upload and add a commit message.
+
+Add a model card
+To make sure users understand your model's capabilities, limitations, potential biases and ethical considerations, please add a model card to your repository. The model card is defined in the README.md file. You can add a model card by:
+
+Manually creating and uploading a README.md file.
+Clicking on the Edit model card button in your model repository.
+
+Take a look at the DistilBert model card for a good example of the type of information a model card should include. For more details about other options you can control in the README.md file such as a model's carbon footprint or widget examples, refer to the documentation here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d07fc42022f990a488311347ecb91c6572318b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+The Transformer model family
+Since its introduction in 2017, the original Transformer model (see the Annotated Transformer blog post for a gentle technical introduction) has inspired many new and exciting models that extend beyond natural language processing (NLP) tasks. There are models for predicting the folded structure of proteins, training a cheetah to run, and time series forecasting. With so many Transformer variants available, it can be easy to miss the bigger picture. What all these models have in common is they're based on the original Transformer architecture. Some models only use the encoder or decoder, while others use both. This provides a useful taxonomy to categorize and examine the high-level differences within models in the Transformer family, and it'll help you understand Transformers you haven't encountered before.
+If you aren't familiar with the original Transformer model or need a refresher, check out the How do Transformers work chapter from the Hugging Face course.
+
+Computer vision
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3144b7c06bfc93810598fe478f2d07a20eb7767b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+Convolutional network
+For a long time, convolutional networks (CNNs) were the dominant paradigm for computer vision tasks until the Vision Transformer demonstrated its scalability and efficiency. Even then, some of a CNN's best qualities, like translation invariance, are so powerful (especially for certain tasks) that some Transformers incorporate convolutions in their architecture. ConvNeXt flipped this exchange around and incorporated design choices from Transformers to modernize a CNN. For example, ConvNeXt uses non-overlapping sliding windows to patchify an image and a larger kernel to increase its global receptive field. ConvNeXt also makes several layer design choices to be more memory-efficient and improve performance, so it competes favorably with Transformers!
+Encoder[[cv-encoder]]
+The Vision Transformer (ViT) opened the door to computer vision tasks without convolutions. ViT uses a standard Transformer encoder, but its main breakthrough was how it treated an image. It splits an image into fixed-size patches and uses them to create an embedding, just like how a sentence is split into tokens. ViT capitalized on the Transformers' efficient architecture to demonstrate competitive results with the CNNs at the time while requiring fewer resources to train. ViT was soon followed by other vision models that could also handle dense vision tasks like segmentation as well as detection.
+One of these models is the Swin Transformer. It builds hierarchical feature maps (like a CNN 👀 and unlike ViT) from smaller-sized patches and merges them with neighboring patches in deeper layers. Attention is only computed within a local window, and the window is shifted between attention layers to create connections to help the model learn better. Since the Swin Transformer can produce hierarchical feature maps, it is a good candidate for dense prediction tasks like segmentation and detection. The SegFormer also uses a Transformer encoder to build hierarchical feature maps, but it adds a simple multilayer perceptron (MLP) decoder on top to combine all the feature maps and make a prediction.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..697a9af561803207f40be746c27d65adde8f7f0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+Other vision models, like BeIT and ViTMAE, drew inspiration from BERT's pretraining objective. BeIT is pretrained by masked image modeling (MIM); the image patches are randomly masked, and the image is also tokenized into visual tokens. BeIT is trained to predict the visual tokens corresponding to the masked patches. ViTMAE has a similar pretraining objective, except it must predict the pixels instead of visual tokens. What's unusual is 75% of the image patches are masked! The decoder reconstructs the pixels from the masked tokens and encoded patches. After pretraining, the decoder is thrown away, and the encoder is ready to be used in downstream tasks.
+Decoder[[cv-decoder]]
+Decoder-only vision models are rare because most vision models rely on an encoder to learn an image representation. But for use cases like image generation, the decoder is a natural fit, as we've seen from text generation models like GPT-2. ImageGPT uses the same architecture as GPT-2, but instead of predicting the next token in a sequence, it predicts the next pixel in an image. In addition to image generation, ImageGPT could also be finetuned for image classification.
+Encoder-decoder[[cv-encoder-decoder]]
+Vision models commonly use an encoder (also known as a backbone) to extract important image features before passing them to a Transformer decoder. DETR has a pretrained backbone, but it also uses the complete Transformer encoder-decoder architecture for object detection. The encoder learns image representations and combines them with object queries (each object query is a learned embedding that focuses on a region or object in an image) in the decoder. DETR predicts the bounding box coordinates and class label for each object query.
+Natural language processing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d61a76ca8630880c4d120e6ba41acc4136f4dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_3.txt
@@ -0,0 +1,5 @@
+Encoder[[nlp-encoder]]
+BERT is an encoder-only Transformer that randomly masks certain tokens in the input to avoid seeing other tokens, which would allow it to "cheat". The pretraining objective is to predict the masked token based on the context. This allows BERT to fully use the left and right contexts to help it learn a deeper and richer representation of the inputs. However, there was still room for improvement in BERT's pretraining strategy. RoBERTa improved upon this by introducing a new pretraining recipe that includes training for longer and on larger batches, randomly masking tokens at each epoch instead of just once during preprocessing, and removing the next-sentence prediction objective. 
+The dominant strategy to improve performance is to increase the model size. But training large models is computationally expensive. One way to reduce computational costs is using a smaller model like DistilBERT. DistilBERT uses knowledge distillation - a compression technique - to create a smaller version of BERT while keeping nearly all of its language understanding capabilities. 
+However, most Transformer models continued to trend towards more parameters, leading to new models focused on improving training efficiency. ALBERT reduces memory consumption by lowering the number of parameters in two ways: separating the larger vocabulary embedding into two smaller matrices and allowing layers to share parameters. DeBERTa added a disentangled attention mechanism where the word and its position are separately encoded in two vectors. The attention is computed from these separate vectors instead of a single vector containing the word and position embeddings. Longformer also focused on making attention more efficient, especially for processing documents with longer sequence lengths. It uses a combination of local windowed attention (attention only calculated from fixed window size around each token) and global attention (only for specific task tokens like [CLS] for classification) to create a sparse attention matrix instead of a full attention matrix.
+Decoder[[nlp-decoder]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d69cf8a8881bf44f59d5526432a8c22d6f62c67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+Decoder[[nlp-decoder]]
+GPT-2 is a decoder-only Transformer that predicts the next word in the sequence. It masks tokens to the right so the model can't "cheat" by looking ahead. By pretraining on a massive body of text, GPT-2 became really good at generating text, even if the text is only sometimes accurate or true. But GPT-2 lacked the bidirectional context from BERT's pretraining, which made it unsuitable for certain tasks. XLNET combines the best of both BERT and GPT-2's pretraining objectives by using a permutation language modeling objective (PLM) that allows it to learn bidirectionally.
+After GPT-2, language models grew even bigger and are now known as large language models (LLMs). LLMs demonstrate few- or even zero-shot learning if pretrained on a large enough dataset. GPT-J is an LLM with 6B parameters and trained on 400B tokens. GPT-J was followed by OPT, a family of decoder-only models, the largest of which is 175B and trained on 180B tokens. BLOOM was released around the same time, and the largest model in the family has 176B parameters and is trained on 366B tokens in 46 languages and 13 programming languages.
+Encoder-decoder[[nlp-encoder-decoder]]
+BART keeps the original Transformer architecture, but it modifies the pretraining objective with text infilling corruption, where some text spans are replaced with a single mask token. The decoder predicts the uncorrupted tokens (future tokens are masked) and uses the encoder's hidden states to help it. Pegasus is similar to BART, but Pegasus masks entire sentences instead of text spans. In addition to masked language modeling, Pegasus is pretrained by gap sentence generation (GSG). The GSG objective masks whole sentences important to a document, replacing them with a mask token. The decoder must generate the output from the remaining sentences. T5 is a more unique model that casts all NLP tasks into a text-to-text problem using specific prefixes. For example, the prefix Summarize: indicates a summarization task. T5 is pretrained by supervised (GLUE and SuperGLUE) training and self-supervised training (randomly sample and drop out 15% of tokens).
+Audio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..246f3acfd40a721b54b7d354f8dad610eab8849d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_5.txt
@@ -0,0 +1,5 @@
+Encoder[[audio-encoder]]
+Wav2Vec2 uses a Transformer encoder to learn speech representations directly from raw audio waveforms. It is pretrained with a contrastive task to determine the true speech representation from a set of false ones. HuBERT is similar to Wav2Vec2 but has a different training process. Target labels are created by a clustering step in which segments of similar audio are assigned to a cluster which becomes a hidden unit. The hidden unit is mapped to an embedding to make a prediction.
+Encoder-decoder[[audio-encoder-decoder]]
+Speech2Text is a speech model designed for automatic speech recognition (ASR) and speech translation. The model accepts log mel-filter bank features extracted from the audio waveform and pretrained autoregressively to generate a transcript or translation. Whisper is also an ASR model, but unlike many other speech models, it is pretrained on a massive amount of ✨ labeled ✨ audio transcription data for zero-shot performance. A large chunk of the dataset also contains non-English languages, meaning Whisper can also be used for low-resource languages. Structurally, Whisper is similar to Speech2Text. The audio signal is converted to a log-mel spectrogram encoded by the encoder. The decoder generates the transcript autoregressively from the encoder's hidden states and the previous tokens.
+Multimodal
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..669ff160c87f053b9bd2014eea399688c52637cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_6.txt
@@ -0,0 +1,6 @@
+Encoder[[mm-encoder]]
+VisualBERT is a multimodal model for vision-language tasks released shortly after BERT. It combines BERT and a pretrained object detection system to extract image features into visual embeddings, passed alongside text embeddings to BERT. VisualBERT predicts the masked text based on the unmasked text and the visual embeddings, and it also has to predict whether the text is aligned with the image. When ViT was released, ViLT adopted ViT in its architecture because it was easier to get the image embeddings this way. The image embeddings are jointly processed with the text embeddings. From there, ViLT is pretrained by image text matching, masked language modeling, and whole word masking.
+CLIP takes a different approach and makes a pair prediction of (image, text) . An image encoder (ViT) and a text encoder (Transformer) are jointly trained on a 400 million (image, text) pair dataset to maximize the similarity between the image and text embeddings of the (image, text) pairs. After pretraining, you can use natural language to instruct CLIP to predict the text given an image or vice versa. OWL-ViT builds on top of CLIP by using it as its backbone for zero-shot object detection. After pretraining, an object detection head is added to make a set prediction over the (class, bounding box) pairs.
+Encoder-decoder[[mm-encoder-decoder]]
+Optical character recognition (OCR) is a long-standing text recognition task that typically involves several components to understand the image and generate the text. TrOCR simplifies the process using an end-to-end Transformer. The encoder is a ViT-style model for image understanding and processes the image as fixed-size patches. The decoder accepts the encoder's hidden states and autoregressively generates text. Donut is a more general visual document understanding model that doesn't rely on OCR-based approaches. It uses a Swin Transformer as the encoder and multilingual BART as the decoder. Donut is pretrained to read text by predicting the next word based on the image and text annotations. The decoder generates a token sequence given a prompt. The prompt is represented by a special token for each downstream task. For example, document parsing has a special parsing token that is combined with the encoder hidden states to parse the document into a structured output format (JSON).
+Reinforcement learning
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a20ad9ed9b0732a11344edeb865119e3c4d7662e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_model_summary.txt_chunk_7.txt
@@ -0,0 +1,2 @@
+Decoder[[rl-decoder]]
+The Decision and Trajectory Transformer casts the state, action, and reward as a sequence modeling problem. The Decision Transformer generates a series of actions that lead to a future desired return based on returns-to-go, past states, and actions. For the last K timesteps, each of the three modalities are converted into token embeddings and processed by a GPT-like model to predict a future action token. Trajectory Transformer also tokenizes the states, actions, and rewards and processes them with a GPT architecture. Unlike the Decision Transformer, which is focused on reward conditioning, the Trajectory Transformer generates future actions with beam search.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7577089b17c062d5a09e5b742f2654a96711e3ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Multilingual models for inference
+[[open-in-colab]]
+There are several multilingual models in 🤗 Transformers, and their inference usage differs from monolingual models. Not all multilingual model usage is different though. Some models, like google-bert/bert-base-multilingual-uncased, can be used just like a monolingual model. This guide will show you how to use multilingual models whose usage differs for inference.
+XLM
+XLM has ten different checkpoints, only one of which is monolingual. The nine remaining model checkpoints can be split into two categories: the checkpoints that use language embeddings and those that don't.
+XLM with language embeddings
+The following XLM models use language embeddings to specify the language used at inference:
+
+FacebookAI/xlm-mlm-ende-1024 (Masked language modeling, English-German)
+FacebookAI/xlm-mlm-enfr-1024 (Masked language modeling, English-French)
+FacebookAI/xlm-mlm-enro-1024 (Masked language modeling, English-Romanian)
+FacebookAI/xlm-mlm-xnli15-1024 (Masked language modeling, XNLI languages)
+FacebookAI/xlm-mlm-tlm-xnli15-1024 (Masked language modeling + translation, XNLI languages)
+FacebookAI/xlm-clm-enfr-1024 (Causal language modeling, English-French)
+FacebookAI/xlm-clm-ende-1024 (Causal language modeling, English-German)
+
+Language embeddings are represented as a tensor of the same shape as the input_ids passed to the model. The values in these tensors depend on the language used and are identified by the tokenizer's lang2id and id2lang attributes.
+In this example, load the FacebookAI/xlm-clm-enfr-1024 checkpoint (Causal language modeling, English-French):
+
+import torch
+from transformers import XLMTokenizer, XLMWithLMHeadModel
+tokenizer = XLMTokenizer.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+model = XLMWithLMHeadModel.from_pretrained("FacebookAI/xlm-clm-enfr-1024")
+
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80db2e63d52520d673776f4c01527cae6b02b409
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+The lang2id attribute of the tokenizer displays this model's languages and their ids:
+
+print(tokenizer.lang2id)
+{'en': 0, 'fr': 1}
+
+Next, create an example input:
+
+input_ids = torch.tensor([tokenizer.encode("Wikipedia was used to")])  # batch size of 1
+
+Set the language id as "en" and use it to define the language embedding. The language embedding is a tensor filled with 0 since that is the language id for English. This tensor should be the same size as input_ids. 
+
+language_id = tokenizer.lang2id["en"]  # 0
+langs = torch.tensor([language_id] * input_ids.shape[1])  # torch.tensor([0, 0, 0, , 0])
+We reshape it to be of size (batch_size, sequence_length)
+langs = langs.view(1, -1)  # is now of shape [1, sequence_length] (we have a batch size of 1)
+
+Now you can pass the input_ids and language embedding to the model:
+
+outputs = model(input_ids, langs=langs)
+
+The run_generation.py script can generate text with language embeddings using the xlm-clm checkpoints.
+XLM without language embeddings
+The following XLM models do not require language embeddings during inference:
+
+FacebookAI/xlm-mlm-17-1280 (Masked language modeling, 17 languages)
+FacebookAI/xlm-mlm-100-1280 (Masked language modeling, 100 languages)
+
+These models are used for generic sentence representations, unlike the previous XLM checkpoints.
+BERT
+The following BERT models can be used for multilingual tasks:
+
+google-bert/bert-base-multilingual-uncased (Masked language modeling + Next sentence prediction, 102 languages)
+google-bert/bert-base-multilingual-cased (Masked language modeling + Next sentence prediction, 104 languages)
+
+These models do not require language embeddings during inference. They should identify the language from the
+context and infer accordingly.
+XLM-RoBERTa
+The following XLM-RoBERTa models can be used for multilingual tasks:
+
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90f04f4e7360d051c47c30aaf36b990c7104baf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+FacebookAI/xlm-roberta-base (Masked language modeling, 100 languages)
+FacebookAI/xlm-roberta-large (Masked language modeling, 100 languages)
+
+XLM-RoBERTa was trained on 2.5TB of newly created and cleaned CommonCrawl data in 100 languages. It provides strong gains over previously released multilingual models like mBERT or XLM on downstream tasks like classification, sequence labeling, and question answering.
+M2M100
+The following M2M100 models can be used for multilingual translation:
+
+facebook/m2m100_418M (Translation)
+facebook/m2m100_1.2B (Translation)
+
+In this example, load the facebook/m2m100_418M checkpoint to translate from Chinese to English. You can set the source language in the tokenizer:
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+chinese_text = "不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒."
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="zh")
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+
+Tokenize the text:
+
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+
+M2M100 forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+'Do not interfere with the matters of the witches, because they are delicate and will soon be angry.'
+
+MBart
+The following MBart models can be used for multilingual translation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6d23b23480d00bb40afb339c90b5ddccb08a6ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_multilingual.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+MBart
+The following MBart models can be used for multilingual translation:
+
+facebook/mbart-large-50-one-to-many-mmt (One-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-many-mmt (Many-to-many multilingual machine translation, 50 languages)
+facebook/mbart-large-50-many-to-one-mmt (Many-to-one multilingual machine translation, 50 languages)
+facebook/mbart-large-50 (Multilingual translation, 50 languages)
+facebook/mbart-large-cc25
+
+In this example, load the facebook/mbart-large-50-many-to-many-mmt checkpoint to translate Finnish to English. You can set the source language in the tokenizer:
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+en_text = "Do not meddle in the affairs of wizards, for they are subtle and quick to anger."
+fi_text = "Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia."
+tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="fi_FI")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+
+Tokenize the text:
+
+encoded_en = tokenizer(en_text, return_tensors="pt")
+
+MBart forces the target language id as the first generated token to translate to the target language. Set the forced_bos_token_id to en in the generate method to translate to English:
+
+generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Don't interfere with the wizard's affairs, because they are subtle, will soon get angry."
+
+If you are using the facebook/mbart-large-50-many-to-one-mmt checkpoint, you don't need to force the target language id as the first generated token otherwise the usage is the same.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b04a6aba64c17812a0a3f3d06255a81111ed259c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+🤗 Transformers Notebooks
+You can find here a list of the official notebooks provided by Hugging Face.
+Also, we would like to list here interesting content created by the community.
+If you wrote some notebook(s) leveraging 🤗 Transformers and would like to be listed here, please open a
+Pull Request so it can be included under the Community notebooks.
+Hugging Face's notebooks 🤗
+Documentation notebooks
+You can open any page of the documentation as a notebook in Colab (there is a button directly on said pages) but they are also listed here if you need them:
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Quicktour of the library  | A presentation of the various APIs in Transformers || |
+| Summary of the tasks  | How to run the models of the Transformers library task by task || |
+| Preprocessing data  | How to use a tokenizer to preprocess your data || |
+| Fine-tuning a pretrained model  | How to use the Trainer to fine-tune a pretrained model || |
+| Summary of the tokenizers  | The differences between the tokenizers algorithm || |
+| Multilingual models  | How to use the multilingual models of the library || |
+PyTorch Examples
+Natural Language Processing[[pytorch-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca825707640d983a995868700e612c7bf9334d85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+| How to train a language model from scratch| Highlight all the steps to effectively train Transformer model on custom data | | |
+| How to generate text| How to use different decoding methods for language generation with transformers | | |
+| How to generate text (with constraints)| How to guide language generation with user-provided constraints | | |
+| Reformer| How Reformer pushes the limits of language modeling | | |
+Computer Vision[[pytorch-cv]]
+| Notebook                                                                                                                                                                   | Description                                                                                                            |                                                                                                                                                                                                            |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------:|
+| How to fine-tune a model on image classification (Torchvision)                   | Show how to preprocess the data using Torchvision and fine-tune any pretrained Vision model on Image Classification    |                  | |
+| How to fine-tune a model on image classification (Albumentations) | Show how to preprocess the data using Albumentations and fine-tune any pretrained Vision model on Image Classification |   | |
+| How to fine-tune a model on image classification (Kornia)                 | Show how to preprocess the data using Kornia and fine-tune any pretrained Vision model on Image Classification         |           | |
+| How to perform zero-shot object detection with OWL-ViT          | Show how to perform zero-shot object detection on images with text queries                                             | | |
+| How to fine-tune an image captioning model                                      | Show how to fine-tune BLIP for image captioning on a custom dataset                                                    |                 | |
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ef3e25e2e29464e924f270ec89f7158ccf9a68d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+| How to build an image similarity system with Transformers                            | Show how to build an image similarity system                                                                           |                      | |
+| How to fine-tune a SegFormer model on semantic segmentation                     | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation                    |                 | |
+| How to fine-tune a VideoMAE model on video classification          | Show how to preprocess the data and fine-tune a pretrained VideoMAE model on Video Classification                      |                 | |
+Audio[[pytorch-audio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a speech recognition model in English| Show how to preprocess the data and fine-tune a pretrained Speech model on TIMIT | | |
+| How to fine-tune a speech recognition model in any language| Show how to preprocess the data and fine-tune a multi-lingually pretrained speech model on Common Voice | | |
+| How to fine-tune a model on audio classification| Show how to preprocess the data and fine-tune a pretrained Speech model on Keyword Spotting | | |
+Biological Sequences[[pytorch-bio]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+| How to generate protein folds | See how to go from protein sequence to a full protein model and PDB file                |  |  |
+| How to fine-tune a Nucleotide Transformer model | See how to tokenize DNA and fine-tune a large pre-trained DNA "language" model |  |  |
+| Fine-tune a Nucleotide Transformer model with LoRA | Train even larger DNA models in a memory-efficient way |  |  |
+Other modalities[[pytorch-other]]
+| Notebook     | Description                                                                             |   |   |
+|:----------|:----------------------------------------------------------------------------------------|:-------------|------:|
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dbfd9fb185230285cc13f0eea82cf22038c605b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+| Probabilistic Time Series Forecasting | See how to train Time Series Transformer on a custom dataset                            |  |  |
+Utility notebooks[[pytorch-utility]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to export model to ONNX| Highlight how to export and run inference workloads through ONNX | | |
+| How to use Benchmarks| How to benchmark models with transformers | | |
+TensorFlow Examples
+Natural Language Processing[[tensorflow-nlp]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| Train your tokenizer  | How to train and use your very own tokenizer  || |
+| Train your language model   | How to easily start using transformers  || |
+| How to fine-tune a model on text classification| Show how to preprocess the data and fine-tune a pretrained model on any GLUE task. | | |
+| How to fine-tune a model on language modeling| Show how to preprocess the data and fine-tune a pretrained model on a causal or masked LM task. | | |
+| How to fine-tune a model on token classification| Show how to preprocess the data and fine-tune a pretrained model on a token classification task (NER, PoS). | | |
+| How to fine-tune a model on question answering| Show how to preprocess the data and fine-tune a pretrained model on SQUAD. | | |
+| How to fine-tune a model on multiple choice| Show how to preprocess the data and fine-tune a pretrained model on SWAG. | | |
+| How to fine-tune a model on translation| Show how to preprocess the data and fine-tune a pretrained model on WMT. | | |
+| How to fine-tune a model on summarization| Show how to preprocess the data and fine-tune a pretrained model on XSUM. | | |
+Computer Vision[[tensorflow-cv]]
+| Notebook                                                                                                                                                 | Description                                                                                         |   |   |
+|:---------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------|:-------------|------:|
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ba882945c2fb2279f31af41670e5e455f6f4a76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_notebooks.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+| How to fine-tune a model on image classification            | Show how to preprocess the data and fine-tune any pretrained Vision model on Image Classification   | | |
+| How to fine-tune a SegFormer model on semantic segmentation | Show how to preprocess the data and fine-tune a pretrained SegFormer model on Semantic Segmentation | | |
+Biological Sequences[[tensorflow-bio]]
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to fine-tune a pre-trained protein model | See how to tokenize proteins and fine-tune a large pre-trained protein "language" model |  |  |
+Utility notebooks[[tensorflow-utility]]
+| Notebook     |      Description      |   |                                                                                                                                                                                      |
+|:----------|:-------------|:-------------|------:|
+| How to train TF/Keras models on TPU | See how to train at high speed on Google's TPU hardware |  |  |
+Optimum notebooks
+🤗  Optimum is an extension of 🤗 Transformers, providing a set of performance optimization tools enabling maximum efficiency to train and run models on targeted hardwares.
+| Notebook     |      Description      |   |   |
+|:----------|:-------------|:-------------|------:|
+| How to quantize a model with ONNX Runtime for text classification| Show how to apply static and dynamic quantization on a model using ONNX Runtime for any GLUE task. | | |
+| How to quantize a model with Intel Neural Compressor for text classification| Show how to apply static, dynamic and aware training quantization on a model using Intel Neural Compressor (INC) for any GLUE task. | | |
+| How to fine-tune a model on text classification with ONNX Runtime| Show how to preprocess the data and fine-tune a model on any GLUE task using ONNX Runtime. | | |
+| How to fine-tune a model on summarization with ONNX Runtime| Show how to preprocess the data and fine-tune a model on XSUM using ONNX Runtime. | | |
+Community notebooks:
+More notebooks developed by the community are available here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7a7fbc5cf1a0cfec216ea06d3aa1d47906421a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Padding and truncation
+Batched inputs are often different lengths, so they can't be converted to fixed-size tensors. Padding and truncation are strategies for dealing with this problem, to create rectangular tensors from batches of varying lengths. Padding adds a special padding token to ensure shorter sequences will have the same length as either the longest sequence in a batch or the maximum length accepted by the model. Truncation works in the other direction by truncating long sequences.
+In most cases, padding your batch to the length of the longest sequence and truncating to the maximum length a model can accept works pretty well. However, the API supports more strategies if you need them. The three arguments you need to are: padding, truncation and max_length.
+The padding argument controls padding. It can be a boolean or a string:
+
+True or 'longest': pad to the longest sequence in the batch (no padding is applied if you only provide
+    a single sequence).
+'max_length': pad to a length specified by the max_length argument or the maximum length accepted
+    by the model if no max_length is provided (max_length=None). Padding will still be applied if you only provide a single sequence.
+False or 'do_not_pad': no padding is applied. This is the default behavior.
+
+The truncation argument controls truncation. It can be a boolean or a string:
+
+True or 'longest_first': truncate to a maximum length specified by the max_length argument or
+    the maximum length accepted by the model if no max_length is provided (max_length=None). This will
+    truncate token by token, removing a token from the longest sequence in the pair until the proper length is
+    reached.
+'only_second': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the second sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+'only_first': truncate to a maximum length specified by the max_length argument or the maximum
+    length accepted by the model if no max_length is provided (max_length=None). This will only truncate
+    the first sentence of a pair if a pair of sequences (or a batch of pairs of sequences) is provided.
+False or 'do_not_truncate': no truncation is applied. This is the default behavior.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4437eff32f84c6384f126c0dadde99be1dfc3966
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+The max_length argument controls the length of the padding and truncation. It can be an integer or None, in which case it will default to the maximum length the model can accept. If the model has no specific maximum input length, truncation or padding to max_length is deactivated.
+The following table summarizes the recommended way to setup padding and truncation. If you use pairs of input sequences in any of the following examples, you can replace truncation=True by a STRATEGY selected in
+['only_first', 'only_second', 'longest_first'], i.e. truncation='only_second' or truncation='longest_first' to control how both sequences in the pair are truncated as detailed before.
+| Truncation                           | Padding                           | Instruction                                                                                 |
+|--------------------------------------|-----------------------------------|---------------------------------------------------------------------------------------------|
+| no truncation                        | no padding                        | tokenizer(batch_sentences)                                                           |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True) or                                          |
+|                                      |                                   | tokenizer(batch_sentences, padding='longest')                                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length')                                     |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', max_length=42)                      |
+|                                      | padding to a multiple of a value  | tokenizer(batch_sentences, padding=True, pad_to_multiple_of=8)                        |
+| truncation to max model input length | no padding                        | tokenizer(batch_sentences, truncation=True) or                                       |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY)                                      |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True) or                         |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY)                        |
+|                                      | padding to max model input length | tokenizer(batch_sentences, padding='max_length', truncation=True) or                 |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7d0dfe5821f7a99a198e0a6f60f3c87edef2e2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pad_truncation.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)                |
+|                                      | padding to specific length        | Not possible                                                                                |
+| truncation to specific length        | no padding                        | tokenizer(batch_sentences, truncation=True, max_length=42) or                        |
+|                                      |                                   | tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)                       |
+|                                      | padding to max sequence in batch  | tokenizer(batch_sentences, padding=True, truncation=True, max_length=42) or          |
+|                                      |                                   | tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)         |
+|                                      | padding to max model input length | Not possible                                                                                |
+|                                      | padding to specific length        | tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42) or  |
+|                                      |                                   | tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42) |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad17b78e6df4b8d366aa3054a2d1cf9657f8d3ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Custom hardware for training
+The hardware you use to run model training and inference can have a big effect on performance. For a deep dive into GPUs make sure to check out Tim Dettmer's excellent blog post.
+Let's have a look at some practical advice for GPU setups.
+GPU
+When you train bigger models you have essentially three options:
+
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b489644deb20c8b5f30da3e86c4ec537971eb2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+bigger GPUs
+more GPUs
+more CPU and NVMe (offloaded to by DeepSpeed-Infinity)
+
+Let's start at the case where you have a single GPU.
+Power and Cooling
+If you bought an expensive high end GPU make sure you give it the correct power and sufficient cooling.
+Power:
+Some high end consumer GPU cards have 2 and sometimes 3 PCI-E 8-Pin power sockets. Make sure you have as many independent 12V PCI-E 8-Pin cables plugged into the card as there are sockets. Do not use the 2 splits at one end of the same cable (also known as pigtail cable). That is if you have 2 sockets on the GPU, you want 2 PCI-E 8-Pin cables going from your PSU to the card and not one that has 2 PCI-E 8-Pin connectors at the end! You won't get the full performance out of your card otherwise.
+Each PCI-E 8-Pin power cable needs to be plugged into a 12V rail on the PSU side and can supply up to 150W of power.
+Some other cards may use a PCI-E 12-Pin connectors, and these can deliver up to 500-600W of power.
+Low end cards may use 6-Pin connectors, which supply up to 75W of power.
+Additionally you want the high-end PSU that has stable voltage. Some lower quality ones may not give the card the stable voltage it needs to function at its peak.
+And of course the PSU needs to have enough unused Watts to power the card.
+Cooling:
+When a GPU gets overheated it will start throttling down and will not deliver full performance and it can even shutdown if it gets too hot.
+It's hard to tell the exact best temperature to strive for when a GPU is heavily loaded, but probably anything under +80C is good, but lower is better - perhaps 70-75C is an excellent range to be in. The throttling down is likely to start at around 84-90C. But other than throttling performance a prolonged very high temperature is likely to reduce the lifespan of a GPU.
+Next let's have a look at one of the most important aspects when having multiple GPUs: connectivity.
+Multi-GPU Connectivity
+If you use multiple GPUs the way cards are inter-connected can have a huge impact on the total training time. If the GPUs are on the same physical node, you can run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5769d1bb8d46c9ce7b79c815de2b03371ef43f3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+nvidia-smi topo -m
+and it will tell you how the GPUs are inter-connected. On a machine with dual-GPU and which are connected with NVLink, you will most likely see something like:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      NV2     0-23            N/A
+GPU1    NV2      X      0-23            N/A
+on a different machine w/o NVLink we may see:
+GPU0    GPU1    CPU Affinity    NUMA Affinity
+GPU0     X      PHB     0-11            N/A
+GPU1    PHB      X      0-11            N/A
+The report includes this legend:
+X    = Self
+  SYS  = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI)
+  NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node
+  PHB  = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU)
+  PXB  = Connection traversing multiple PCIe bridges (without traversing the PCIe Host Bridge)
+  PIX  = Connection traversing at most a single PCIe bridge
+  NV#  = Connection traversing a bonded set of # NVLinks
+So the first report NV2 tells us the GPUs are interconnected with 2 NVLinks, and the second report PHB we have a typical consumer-level PCIe+Bridge setup.
+Check what type of connectivity you have on your setup. Some of these will make the communication between cards faster (e.g. NVLink), others slower (e.g. PHB).
+Depending on the type of scalability solution used, the connectivity speed could have a major or a minor impact. If the GPUs need to sync rarely, as in DDP, the impact of a slower connection will be less significant. If the GPUs need to send messages to each other often, as in ZeRO-DP, then faster connectivity becomes super important to achieve faster training.
+NVlink
+NVLink is a wire-based serial multi-lane near-range communications link developed by Nvidia.
+Each new generation provides a faster bandwidth, e.g. here is a quote from Nvidia Ampere GA102 GPU Architecture:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cb228f471a43aabd176035ad041ca4da871f685
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+Third-Generation NVLink®
+GA102 GPUs utilize NVIDIA’s third-generation NVLink interface, which includes four x4 links,
+with each link providing 14.0625 GB/sec bandwidth in each direction between two GPUs. Four
+links provide 56.25 GB/sec bandwidth in each direction, and 112.5 GB/sec total bandwidth
+between two GPUs. Two RTX 3090 GPUs can be connected together for SLI using NVLink.
+(Note that 3-Way and 4-Way SLI configurations are not supported.)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d10a188871a57daeca11d5be7c4518e5c377007
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_hardware.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+So the higher X you get in the report of NVX in the output of nvidia-smi topo -m the better. The generation will depend on your GPU architecture.
+Let's compare the execution of a openai-community/gpt2 language model training over a small sample of wikitext.
+The results are:
+| NVlink | Time |
+| -----  | ---: |
+| Y      | 101s |
+| N      | 131s |
+You can see that NVLink completes the training ~23% faster. In the second benchmark we use NCCL_P2P_DISABLE=1 to tell the GPUs not to use NVLink.
+Here is the full benchmark code and outputs:
+```bash
+DDP w/ NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train \
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+DDP w/o NVLink
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 NCCL_P2P_DISABLE=1 torchrun \
+--nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py --model_name_or_path openai-community/gpt2 \
+--dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train
+--output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m)
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3a0378f54b56d0909423c6894040e1a9c8d0ea5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+CPU inference
+With some optimizations, it is possible to efficiently run large model inference on a CPU. One of these optimization techniques involves compiling the PyTorch code into an intermediate format for high-performance environments like C++. The other technique fuses multiple operations into one kernel to reduce the overhead of running each operation separately.
+You'll learn how to use BetterTransformer for faster inference, and how to convert your PyTorch code to TorchScript. If you're using an Intel CPU, you can also use graph optimizations from Intel Extension for PyTorch to boost inference speed even more. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime or OpenVINO (if you're using an Intel CPU).
+BetterTransformer
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention.
+
+BetterTransformer is not supported for all models. Check this list to see if a model supports BetterTransformer.
+
+Before you start, make sure you have 🤗 Optimum installed.
+Enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder")
+model.to_bettertransformer()
+
+TorchScript
+TorchScript is an intermediate PyTorch model representation that can be run in production environments where performance is important. You can train a model in PyTorch and then export it to TorchScript to free the model from Python performance constraints. PyTorch traces a model to return a [ScriptFunction] that is optimized with just-in-time compilation (JIT). Compared to the default eager mode, JIT mode in PyTorch typically yields better performance for inference using optimization techniques like operator fusion.
+For a gentle introduction to TorchScript, see the Introduction to PyTorch TorchScript tutorial.
+With the [Trainer] class, you can enable JIT mode for CPU inference by setting the --jit_mode_eval flag:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21c1e3d773ede947941222ba03b641630743c732
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--jit_mode_eval
+
+For PyTorch >= 1.14.0, JIT-mode could benefit any model for prediction and evaluation since the dict input is supported in jit.trace.
+For PyTorch < 1.14.0, JIT-mode could benefit a model if its forward parameter order matches the tuple input order in jit.trace, such as a question-answering model. If the forward parameter order does not match the tuple input order in jit.trace, like a text classification model, jit.trace will fail and we are capturing this with the exception here to make it fallback. Logging is used to notify users.
+
+IPEX graph optimization
+Intel® Extension for PyTorch (IPEX) provides further optimizations in JIT mode for Intel CPUs, and we recommend combining it with TorchScript for even faster performance. The IPEX graph optimization fuses operations like Multi-head attention, Concat Linear, Linear + Add, Linear + Gelu, Add + LayerNorm, and more.
+To take advantage of these graph optimizations, make sure you have IPEX installed:
+
+pip install intel_extension_for_pytorch
+Set the --use_ipex and --jit_mode_eval flags in the [Trainer] class to enable JIT mode with the graph optimizations:
+
+python run_qa.py \
+--model_name_or_path csarron/bert-base-uncased-squad-v1 \
+--dataset_name squad \
+--do_eval \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/ \
+--no_cuda \
+--use_ipex \
+--jit_mode_eval
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee074135cb41f23513078c70405c85c9f081ba60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_cpu.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+Learn more details about using ORT with 🤗 Optimum in the Optimum Inference with ONNX Runtime guide. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that runs inference on CPUs by default. ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers, without making too many changes to your code. You only need to replace the 🤗 Transformers AutoClass with its equivalent [~optimum.onnxruntime.ORTModel] for the task you're solving, and load a checkpoint in the ONNX format.
+For example, if you're running inference on a question answering task, load the optimum/roberta-base-squad2 checkpoint which contains a model.onnx file:
+
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+model = ORTModelForQuestionAnswering.from_pretrained("optimum/roberta-base-squad2")
+tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
+onnx_qa = pipeline("question-answering", model=model, tokenizer=tokenizer)
+question = "What's my name?"
+context = "My name is Philipp and I live in Nuremberg."
+pred = onnx_qa(question, context)
+
+If you have an Intel CPU, take a look at 🤗 Optimum Intel which supports a variety of compression techniques (quantization, pruning, knowledge distillation) and tools for converting models to the OpenVINO format for higher performance inference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11648c14b97fe9a6860cd75c3d11336f604bc335
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,62 @@
+GPU inference
+GPUs are the standard choice of hardware for machine learning, unlike CPUs, because they are optimized for memory bandwidth and parallelism. To keep up with the larger sizes of modern models or to run these large models on existing and older hardware, there are several optimizations you can use to speed up GPU inference. In this guide, you'll learn how to use FlashAttention-2 (a more memory-efficient attention mechanism), BetterTransformer (a PyTorch native fastpath execution), and bitsandbytes to quantize your model to a lower precision. Finally, learn how to use 🤗 Optimum to accelerate inference with ONNX Runtime on Nvidia and AMD GPUs.
+
+The majority of the optimizations described here also apply to multi-GPU setups!
+
+FlashAttention-2
+
+FlashAttention-2 is experimental and may change considerably in future versions.
+
+FlashAttention-2 is a faster and more efficient implementation of the standard attention mechanism that can significantly speedup inference by:
+
+additionally parallelizing the attention computation over sequence length
+partitioning the work between GPU threads to reduce communication and shared memory reads/writes between them
+
+FlashAttention-2 is currently supported for the following architectures:
+* Bark
+* Bart
+* Cohere
+* Dbrx
+* DistilBert
+* Gemma
+* GPT2
+* GPTBigCode
+* GPTNeo
+* GPTNeoX
+* GPT-J
+* Idefics2
+* Falcon
+* JetMoe
+* Jamba
+* Llama
+* Llava
+* Llava-NeXT
+* VipLlava
+* VideoLlava
+* M2M100
+* MBart
+* Mistral
+* Mixtral
+* Musicgen
+* MusicGen Melody
+* NLLB
+* OLMo
+* OPT
+* Phi
+* Phi3
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Whisper
+* Wav2Vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+You can request to add FlashAttention-2 support for another model by opening a GitHub Issue or Pull Request.
+Before you begin, make sure you have FlashAttention-2 installed.
+
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e45091974011a0cfb18e2f3d0bce220c3d5a2f5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+pip install flash-attn --no-build-isolation
+We strongly suggest referring to the detailed installation instructions to learn more about supported hardware and data types!
+
+FlashAttention-2 is also supported on AMD GPUs and current support is limited to Instinct MI210, Instinct MI250 and Instinct MI300. We strongly suggest using this Dockerfile to use FlashAttention-2 on AMD GPUs.
+
+To enable FlashAttention-2, pass the argument attn_implementation="flash_attention_2" to [~AutoModelForCausalLM.from_pretrained]:
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+
+FlashAttention-2 can only be used when the model's dtype is fp16 or bf16. Make sure to cast your model to the appropriate dtype and load them on a supported device before using FlashAttention-2.
+
+You can also set use_flash_attention_2=True to enable FlashAttention-2 but it is deprecated in favor of attn_implementation="flash_attention_2".
+
+FlashAttention-2 can be combined with other optimization techniques like quantization to further speedup inference. For example, you can combine FlashAttention-2 with 8-bit or 4-bit quantization:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
+model_id = "tiiuae/falcon-7b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+load in 8bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_8bit=True,
+    attn_implementation="flash_attention_2",
+)
+load in 4bit
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    load_in_4bit=True,
+    attn_implementation="flash_attention_2",
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31e14f5796c9ec9cdd30f7554fd7bcdde7c07344
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Expected speedups
+You can benefit from considerable speedups for inference, especially for inputs with long sequences. However, since FlashAttention-2 does not support computing attention scores with padding tokens, you must manually pad/unpad the attention scores for batched inference when the sequence contains padding tokens. This leads to a significant slowdown for batched generations with padding tokens.
+To overcome this, you should use FlashAttention-2 without padding tokens in the sequence during training (by packing a dataset or concatenating sequences until reaching the maximum sequence length).
+For a single forward pass on tiiuae/falcon-7b with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For a single forward pass on meta-llama/Llama-7b-hf with a sequence length of 4096 and various batch sizes without padding tokens, the expected speedup is:
+
+For sequences with padding tokens (generating with padding tokens), you need to unpad/pad the input sequences to correctly compute the attention scores. With a relatively small sequence length, a single forward pass creates overhead leading to a small speedup (in the example below, 30% of the input is filled with padding tokens):
+
+But for larger sequence lengths, you can expect even more speedup benefits:
+
+FlashAttention is more memory efficient, meaning you can train on much larger sequence lengths without running into out-of-memory issues. You can potentially reduce memory usage up to 20x for larger sequence lengths. Take a look at the flash-attention repository for more details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dc29031398d0073345d2df8d97ab5b42d502ba8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,49 @@
+PyTorch scaled dot product attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. You may also set attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+For now, Transformers supports SDPA inference and training for the following architectures:
+* Audio Spectrogram Transformer
+* Bart
+* Bert
+* Cohere
+* Dbrx
+* DeiT
+* Dpr
+* Falcon
+* Gemma
+* GPT2
+* GPTBigCode
+* JetMoe
+* Jamba
+* Llama
+* OLMo
+* PaliGemma
+* Phi
+* Idefics
+* Whisper
+* Mistral
+* Mixtral
+* StableLm
+* Starcoder2
+* Qwen2
+* Qwen2MoE
+* Musicgen
+* MusicGen Melody
+* ViT
+* ViTHybrid
+* ViTMAE
+* ViTMSN
+* VideoMAE
+* wav2vec2
+* Hubert
+* data2vec_audio
+* Sew
+* UniSpeech
+* unispeech_sat
+* YOLOS
+
+FlashAttention can only be used for models with the fp16 or bf16 torch type, so make sure to cast your model to the appropriate type first. The memory-efficient attention backend is able to handle fp32 models.
+
+SDPA does not support certain sets of attention parameters, such as head_mask and output_attentions=True.
+In that case, you should see a warning message and we will fall back to the (slower) eager implementation.
+
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf8ccd977076e854934c80fa6160634504bd83df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+By default, SDPA selects the most performant kernel available but you can check whether a backend is available in a given setting (hardware, problem size) with torch.backends.cuda.sdp_kernel as a context manager:
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16).to("cuda")
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+If you see a bug with the traceback below, try using the nightly version of PyTorch which may have broader coverage for FlashAttention:
+```bash
+RuntimeError: No available kernel. Aborting execution.
+install PyTorch nightly
+pip3 install -U --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118
+
+BetterTransformer
+
+Some BetterTransformer features are being upstreamed to Transformers with default support for native torch.nn.scaled_dot_product_attention. BetterTransformer still has a wider coverage than the Transformers SDPA integration, but you can expect more and more architectures to natively support SDPA in Transformers.
+
+Check out our benchmarks with BetterTransformer and scaled dot product attention in the Out of the box acceleration and memory savings of 🤗 decoder models with PyTorch 2.0 and learn more about the fastpath execution in the BetterTransformer blog post.
+
+BetterTransformer accelerates inference with its fastpath (native PyTorch specialized implementation of Transformer functions) execution. The two optimizations in the fastpath execution are:
+
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31c01c1a4f2d7406931950c708ccf3afe3325384
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+fusion, which combines multiple sequential operations into a single "kernel" to reduce the number of computation steps
+skipping the inherent sparsity of padding tokens to avoid unnecessary computation with nested tensors
+
+BetterTransformer also converts all attention operations to use the more memory-efficient scaled dot product attention (SDPA), and it calls optimized kernels like FlashAttention under the hood.
+Before you start, make sure you have 🤗 Optimum installed.
+Then you can enable BetterTransformer with the [PreTrainedModel.to_bettertransformer] method:
+python
+model = model.to_bettertransformer()
+You can return the original Transformers model with the [~PreTrainedModel.reverse_bettertransformer] method. You should use this before saving your model to use the canonical Transformers modeling:
+py
+model = model.reverse_bettertransformer()
+model.save_pretrained("saved_model")
+bitsandbytes
+bitsandbytes is a quantization library that includes support for 4-bit and 8-bit quantization. Quantization reduces your model size compared to its native full precision version, making it easier to fit large models onto GPUs with limited memory.
+Make sure you have bitsandbytes and 🤗 Accelerate installed:
+```bash
+these versions support 8-bit and 4-bit
+pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
+install Transformers
+pip install transformers
+
+4-bit
+To load a model in 4-bit for inference, use the load_in_4bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment.
+
+from transformers import AutoModelForCausalLM
+model_name = "bigscience/bloom-2b5"
+model_4bit = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a23011f36fd5ee84890823ca1efae863a2990ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 600MB of memory to the first GPU and 1GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "600MB", 1: "1GB"}
+model_name = "bigscience/bloom-3b"
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_4bit=True, max_memory=max_memory_mapping
+)
+8-bit
+
+If you're curious and interested in learning more about the concepts underlying 8-bit quantization, read the Gentle Introduction to 8-bit Matrix Multiplication for transformers at scale using Hugging Face Transformers, Accelerate and bitsandbytes blog post.
+
+To load a model in 8-bit for inference, use the load_in_8bit parameter. The device_map parameter is optional, but we recommend setting it to "auto" to allow 🤗 Accelerate to automatically and efficiently allocate the model given the available resources in the environment:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+
+If you're loading a model in 8-bit for text generation, you should use the [~transformers.GenerationMixin.generate] method instead of the [Pipeline] function which is not optimized for 8-bit models and will be slower. Some sampling strategies, like nucleus sampling, are also not supported by the [Pipeline] for 8-bit models. You should also place all inputs on the same device as the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbec003f4d56928fd3b4f58acdc613de350c45f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_7.txt
@@ -0,0 +1,22 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "bigscience/bloom-2b5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_8bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+prompt = "Hello, my llama is cute"
+inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+generated_ids = model.generate(**inputs)
+outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+
+To load a model in 4-bit for inference with multiple GPUs, you can control how much GPU RAM you want to allocate to each GPU. For example, to distribute 1GB of memory to the first GPU and 2GB of memory to the second GPU:
+py
+max_memory_mapping = {0: "1GB", 1: "2GB"}
+model_name = "bigscience/bloom-3b"
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="auto", load_in_8bit=True, max_memory=max_memory_mapping
+)
+
+Feel free to try running a 11 billion parameter T5 model or the 3 billion parameter BLOOM model for inference on Google Colab's free tier GPUs!
+
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40077f8984d8079ede546b872b7cae5fcd52db42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,24 @@
+🤗 Optimum
+
+Learn more details about using ORT with 🤗 Optimum in the Accelerated inference on NVIDIA GPUs and Accelerated inference on AMD GPUs guides. This section only provides a brief and simple example.
+
+ONNX Runtime (ORT) is a model accelerator that supports accelerated inference on Nvidia GPUs, and AMD GPUs that use ROCm stack. ORT uses optimization techniques like fusing common operations into a single node and constant folding to reduce the number of computations performed and speedup inference. ORT also places the most computationally intensive operations on the GPU and the rest on the CPU to intelligently distribute the workload between the two devices.
+ORT is supported by 🤗 Optimum which can be used in 🤗 Transformers. You'll need to use an [~optimum.onnxruntime.ORTModel] for the task you're solving, and specify the provider parameter which can be set to either CUDAExecutionProvider, ROCMExecutionProvider or TensorrtExecutionProvider. If you want to load a model that was not yet exported to ONNX, you can set export=True to convert your model on-the-fly to the ONNX format:
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+  "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+  export=True,
+  provider="CUDAExecutionProvider",
+)
+
+Now you're free to use the model for inference:
+
+from optimum.pipelines import pipeline
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")
+pipeline = pipeline(task="text-classification", model=ort_model, tokenizer=tokenizer, device="cuda:0")
+result = pipeline("Both the music and visual were astounding, not to mention the actors performance.")
+
+Combine optimizations
+It is often possible to combine several of the optimization techniques described above to get the best inference performance possible for your model. For example, you can load a model in 4-bit, and then enable BetterTransformer with FlashAttention:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b92b91dea10ec36854d71a67878834e857dd0038
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_infer_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,18 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+load model in 4-bit
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16
+)
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", quantization_config=quantization_config)
+enable BetterTransformer
+model = model.to_bettertransformer()
+input_text = "Hello my dog is cute and"
+inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
+enable FlashAttention
+with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_math=False, enable_mem_efficient=False):
+    outputs = model.generate(**inputs)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d00f4321f085248deb83be27aa0e1d87de996537
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Optimize inference using torch.compile()
+This guide aims to provide a benchmark on the inference speed-ups introduced with torch.compile() for computer vision models in 🤗 Transformers.
+Benefits of torch.compile
+Depending on the model and the GPU, torch.compile() yields up to 30% speed-up during inference. To use torch.compile(), simply install any version of torch above 2.0. 
+Compiling a model takes time, so it's useful if you are compiling the model only once instead of every time you infer.
+To compile any computer vision model of your choice, call torch.compile() on the model as shown below:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
++ model = torch.compile(model)
+
+compile() comes with multiple modes for compiling, which essentially differ in compilation time and inference overhead. max-autotune takes longer than reduce-overhead but results in faster inference. Default mode is fastest for compilation but is not as efficient compared to reduce-overhead for inference time. In this guide, we used the default mode. You can learn more about it here.
+We benchmarked torch.compile with different computer vision models, tasks, types of hardware, and batch sizes on torch version 2.0.1.
+Benchmarking code
+Below you can find the benchmarking code for each task. We warm up the GPU before inference and take the mean time of 300 inferences, using the same image each time.
+Image Classification with ViT
+thon 
+import torch
+from PIL import Image
+import requests
+import numpy as np
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = torch.compile(model)
+processed_input = processor(image, return_tensors='pt').to(device="cuda")
+with torch.no_grad():
+    _ = model(**processed_input)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a692a0106b5f10e771ff635a338c6753b6fcbf3b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+Object Detection with DETR
+thon 
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = torch.compile(model)
+texts = ["a photo of a cat", "a photo of a dog"]
+inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**inputs)
+
+Image Segmentation with Segformer
+thon 
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = torch.compile(model)
+seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+with torch.no_grad():
+    _ = model(**seg_inputs)
+
+Below you can find the list of the models we benchmarked.
+Image Classification 
+- google/vit-base-patch16-224
+- microsoft/beit-base-patch16-224-pt22k-ft22k
+- facebook/convnext-large-224
+- microsoft/resnet-50
+Image Segmentation 
+- nvidia/segformer-b0-finetuned-ade-512-512
+- facebook/mask2former-swin-tiny-coco-panoptic
+- facebook/maskformer-swin-base-ade
+- google/deeplabv3_mobilenet_v2_1.0_513
+Object Detection 
+- google/owlvit-base-patch32
+- facebook/detr-resnet-101
+- microsoft/conditional-detr-resnet-50
+Below you can find visualization of inference durations with and without torch.compile() and percentage improvements for each model in different hardware and batch sizes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f00eae67b387545e85d2cd425133b8313e81d443
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+Below you can find inference durations in milliseconds for each model with and without compile(). Note that OwlViT results in OOM in larger batch sizes.
+A100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 9.325 | 7.584 | 
+| Image Segmentation/Segformer | 11.759 | 10.500 |
+| Object Detection/OwlViT | 24.978 | 18.420 |
+| Image Classification/BeiT | 11.282 | 8.448 | 
+| Object Detection/DETR | 34.619 | 19.040 |
+| Image Classification/ConvNeXT | 10.410 | 10.208 | 
+| Image Classification/ResNet | 6.531 | 4.124 |
+| Image Segmentation/Mask2former | 60.188 | 49.117 |
+| Image Segmentation/Maskformer | 75.764 | 59.487 | 
+| Image Segmentation/MobileNet | 8.583 | 3.974 |
+| Object Detection/Resnet-101 | 36.276 | 18.197 |
+| Object Detection/Conditional-DETR | 31.219 | 17.993 |
+A100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 14.832 | 14.499 | 
+| Image Segmentation/Segformer | 18.838 | 16.476 |
+| Image Classification/BeiT | 13.205 | 13.048 | 
+| Object Detection/DETR | 48.657 | 32.418|
+| Image Classification/ConvNeXT | 22.940 | 21.631 | 
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9690a96048a2ecbc6d54a23c109a2498c301667
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+| Image Classification/ResNet | 6.657 | 4.268 |
+| Image Segmentation/Mask2former | 74.277 | 61.781 |
+| Image Segmentation/Maskformer | 180.700 | 159.116 | 
+| Image Segmentation/MobileNet | 14.174 | 8.515 |
+| Object Detection/Resnet-101 | 68.101 | 44.998 |
+| Object Detection/Conditional-DETR | 56.470 | 35.552 |
+A100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 40.944 | 40.010 | 
+| Image Segmentation/Segformer | 37.005 | 31.144 |
+| Image Classification/BeiT | 41.854 | 41.048 | 
+| Object Detection/DETR | 164.382 | 161.902 |
+| Image Classification/ConvNeXT | 82.258 | 75.561 | 
+| Image Classification/ResNet | 7.018 | 5.024 |
+| Image Segmentation/Mask2former | 178.945 | 154.814 |
+| Image Segmentation/Maskformer | 638.570 | 579.826 | 
+| Image Segmentation/MobileNet | 51.693 | 30.310 |
+| Object Detection/Resnet-101 | 232.887 | 155.021 |
+| Object Detection/Conditional-DETR | 180.491 | 124.032 |
+V100 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 10.495 | 6.00 | 
+| Image Segmentation/Segformer | 13.321 | 5.862 | 
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58035e94f7414f58c6c4f1f965f12c68e8686e5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_4.txt
@@ -0,0 +1,27 @@
+| Object Detection/OwlViT | 25.769 | 22.395 | 
+| Image Classification/BeiT | 11.347 | 7.234 | 
+| Object Detection/DETR | 33.951 | 19.388 |
+| Image Classification/ConvNeXT | 11.623 | 10.412 | 
+| Image Classification/ResNet | 6.484 | 3.820 |
+| Image Segmentation/Mask2former | 64.640 | 49.873 |
+| Image Segmentation/Maskformer | 95.532 | 72.207 | 
+| Image Segmentation/MobileNet | 9.217 | 4.753 |
+| Object Detection/Resnet-101 | 52.818 | 28.367 |
+| Object Detection/Conditional-DETR | 39.512 | 20.816 |
+V100 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 15.181 | 14.501 | 
+| Image Segmentation/Segformer | 16.787 | 16.188 |
+| Image Classification/BeiT | 15.171 | 14.753 | 
+| Object Detection/DETR | 88.529 | 64.195 |
+| Image Classification/ConvNeXT | 29.574 | 27.085 | 
+| Image Classification/ResNet | 6.109 | 4.731 |
+| Image Segmentation/Mask2former | 90.402 | 76.926 |
+| Image Segmentation/Maskformer | 234.261 | 205.456 | 
+| Image Segmentation/MobileNet | 24.623 | 14.816 |
+| Object Detection/Resnet-101 | 134.672 | 101.304 |
+| Object Detection/Conditional-DETR | 97.464 | 69.739 |
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecbee19b6d3f25a298abc85ebb444714a652a366
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+V100 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 52.209 | 51.633 | 
+| Image Segmentation/Segformer | 61.013 | 55.499 |
+| Image Classification/BeiT | 53.938 | 53.581  |
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 109.682 | 100.771 | 
+| Image Classification/ResNet | 14.857 | 12.089 |
+| Image Segmentation/Mask2former | 249.605 | 222.801 |
+| Image Segmentation/Maskformer | 831.142 | 743.645 | 
+| Image Segmentation/MobileNet | 93.129 | 55.365 |
+| Object Detection/Resnet-101 | 482.425 | 361.843 |
+| Object Detection/Conditional-DETR | 344.661 | 255.298 |
+T4 (batch size: 1)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 16.520 | 15.786 | 
+| Image Segmentation/Segformer | 16.116 | 14.205 |
+| Object Detection/OwlViT | 53.634 | 51.105 |
+| Image Classification/BeiT | 16.464 | 15.710 | 
+| Object Detection/DETR | 73.100 | 53.99 |
+| Image Classification/ConvNeXT | 32.932 | 30.845 | 
+| Image Classification/ResNet | 6.031 | 4.321 |
+| Image Segmentation/Mask2former | 79.192 | 66.815 |
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36d2d016cb7d8e7d93e88cd6cb11be0cedb233ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_6.txt
@@ -0,0 +1,27 @@
+| Image Segmentation/Maskformer | 200.026 | 188.268 | 
+| Image Segmentation/MobileNet | 18.908 | 11.997 |
+| Object Detection/Resnet-101 | 106.622 | 82.566 |
+| Object Detection/Conditional-DETR | 77.594 | 56.984 |
+T4 (batch size: 4)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 43.653 | 43.626 | 
+| Image Segmentation/Segformer | 45.327 | 42.445 |
+| Image Classification/BeiT | 52.007 | 51.354 | 
+| Object Detection/DETR | 277.850 | 268.003 |
+| Image Classification/ConvNeXT | 119.259 | 105.580 | 
+| Image Classification/ResNet | 13.039 | 11.388 |
+| Image Segmentation/Mask2former | 201.540 | 184.670 |
+| Image Segmentation/Maskformer | 764.052 | 711.280 | 
+| Image Segmentation/MobileNet | 74.289 | 48.677 |
+| Object Detection/Resnet-101 | 421.859 | 357.614 |
+| Object Detection/Conditional-DETR | 289.002 | 226.945 |
+T4 (batch size: 16)
+| Task/Model | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|
+| Image Classification/ViT | 163.914 | 160.907 | 
+| Image Segmentation/Segformer | 192.412 | 163.620 |
+| Image Classification/BeiT | 188.978 | 187.976 | 
+| Object Detection/DETR | OOM | OOM |
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46bb76de932542624c29d58d8dfab22b8641fba9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_7.txt
@@ -0,0 +1,25 @@
+| Image Classification/ConvNeXT | 422.886 | 388.078 | 
+| Image Classification/ResNet | 44.114 | 37.604 |
+| Image Segmentation/Mask2former | 756.337 | 695.291 |
+| Image Segmentation/Maskformer | 2842.940 | 2656.88 | 
+| Image Segmentation/MobileNet | 299.003 | 201.942 |
+| Object Detection/Resnet-101 |  1619.505 | 1262.758 | 
+| Object Detection/Conditional-DETR | 1137.513 | 897.390|
+PyTorch Nightly
+We also benchmarked on PyTorch nightly (2.1.0dev, find the wheel here) and observed improvement in latency both for uncompiled and compiled models. 
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 12.462 | 6.954 | 
+| Image Classification/BeiT | 4 | 14.109 | 12.851 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147 | 
+| Object Detection/DETR | Unbatched | 30.484 | 15.221 |
+| Object Detection/DETR | 4 | 46.816 | 30.942 |
+| Object Detection/DETR | 16 | 163.749 | 163.706  |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 14.408 | 14.052 | 
+| Image Classification/BeiT | 4 | 47.381 | 46.604 | 
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8752bc0f9048d441d4b480b3c5cceafc5e67952f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_8.txt
@@ -0,0 +1,25 @@
+| Image Classification/BeiT | 16 | 42.179 | 42.147  | 
+| Object Detection/DETR | Unbatched | 68.382 | 53.481 |
+| Object Detection/DETR | 4 | 269.615 | 204.785 |
+| Object Detection/DETR | 16 | OOM | OOM   |
+V100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/BeiT | Unbatched | 13.477 | 7.926 | 
+| Image Classification/BeiT | 4 | 15.103 | 14.378 | 
+| Image Classification/BeiT | 16 | 52.517 | 51.691  | 
+| Object Detection/DETR | Unbatched | 28.706 | 19.077 |
+| Object Detection/DETR | 4 | 88.402 | 62.949|
+| Object Detection/DETR | 16 | OOM | OOM  |
+Reduce Overhead
+We benchmarked reduce-overhead compilation mode for A100 and T4 in Nightly.
+A100
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile |
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 11.758 | 7.335 | 
+| Image Classification/ConvNeXT | 4 | 23.171 | 21.490 | 
+| Image Classification/ResNet | Unbatched | 7.435 | 3.801 | 
+| Image Classification/ResNet | 4 | 7.261 | 2.187 | 
+| Object Detection/Conditional-DETR | Unbatched | 32.823 | 11.627  | 
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe5ee820a285bb3ce28cc6699d8221fdaeb28bf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_torch_compile.txt_chunk_9.txt
@@ -0,0 +1,14 @@
+| Object Detection/Conditional-DETR | 4 | 50.622 | 33.831  | 
+| Image Segmentation/MobileNet | Unbatched | 9.869 | 4.244 |
+| Image Segmentation/MobileNet | 4 | 14.385 | 7.946 |
+T4
+| Task/Model | Batch Size | torch 2.0 - no compile | torch 2.0 - compile | 
+|:---:|:---:|:---:|:---:|
+| Image Classification/ConvNeXT | Unbatched | 32.137 | 31.84 | 
+| Image Classification/ConvNeXT | 4 | 120.944 | 110.209 | 
+| Image Classification/ResNet | Unbatched | 9.761 | 7.698 | 
+| Image Classification/ResNet | 4 | 15.215 | 13.871 | 
+| Object Detection/Conditional-DETR | Unbatched | 72.150 | 57.660  | 
+| Object Detection/Conditional-DETR | 4 | 301.494 | 247.543  | 
+| Image Segmentation/MobileNet | Unbatched | 22.266 | 19.339  |
+| Image Segmentation/MobileNet | 4 | 78.311 | 50.983 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..027ea679797de32a8e02350093db63275786a56c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Efficient Training on CPU
+This guide focuses on training large models efficiently on CPU.
+Mixed precision with IPEX
+Mixed precision uses single (fp32) and half-precision (bf16/fp16) data types in a model to accelerate training or inference while still preserving much of the single-precision accuracy. Modern CPUs such as 3rd and 4th Gen Intel® Xeon® Scalable processors natively support bf16, so you should get more performance out of the box by enabling mixed precision training with bf16.
+To further maximize training performance, you can use Intel® Extension for PyTorch (IPEX), which is a library built on PyTorch and adds additional CPU instruction level architecture (ISA) level support such as Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX512-VNNI), and Intel® Advanced Matrix Extensions (Intel® AMX) for an extra performance boost on Intel CPUs. However, CPUs with only AVX2 (e.g., AMD or older Intel CPUs) are not guaranteed to have better performance under IPEX.
+Auto Mixed Precision (AMP) for CPU backends has been enabled since PyTorch 1.10. AMP support for bf16 on CPUs and bf16 operator optimization is also supported in IPEX and partially upstreamed to the main PyTorch branch. You can get better performance and user experience with IPEX AMP.
+Check more detailed information for Auto Mixed Precision.
+IPEX installation:
+IPEX release is following PyTorch, to install via pip:
+| PyTorch Version   | IPEX version   |
+| :---------------: | :----------:   |
+| 2.1.x             |  2.1.100+cpu   |
+| 2.0.x             |  2.0.100+cpu   |
+| 1.13              |  1.13.0+cpu    |
+| 1.12              |  1.12.300+cpu  |
+Please run pip list | grep torch to get your pytorch_version, so you can get the IPEX version_name.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f34341f10387d92cda4329af2ab7b1d4cbec39d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+pip install intel_extension_for_pytorch==<version_name> -f https://developer.intel.com/ipex-whl-stable-cpu
+You can check the latest versions in ipex-whl-stable-cpu if needed.
+Check more approaches for IPEX installation.
+Usage in Trainer
+To enable auto mixed precision with IPEX in Trainer, users should add use_ipex, bf16 and no_cuda in training command arguments.
+Take an example of the use cases on Transformers question-answering
+
+Training with IPEX using BF16 auto mixed precision on CPU:
+
+ python run_qa.py \
+--model_name_or_path google-bert/bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--per_device_train_batch_size 12 \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--output_dir /tmp/debug_squad/ \
+--use_ipex \
+--bf16 \
+--use_cpu
+If you want to enable use_ipex and bf16 in your script, add these parameters to TrainingArguments like this:
+diff
+training_args = TrainingArguments(
+    output_dir=args.output_path,
++   bf16=True,
++   use_ipex=True,
++   use_cpu=True,
+    **kwargs
+)
+Practice example
+Blog: Accelerating PyTorch Transformers with Intel Sapphire Rapids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..784c2d79ea6608d852cdb218641d4078f501e69b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Efficient Training on Multiple CPUs
+When training on a single CPU is too slow, we can use multiple CPUs. This guide focuses on PyTorch-based DDP enabling
+distributed CPU training efficiently on bare metal and Kubernetes.
+Intel® oneCCL Bindings for PyTorch
+Intel® oneCCL (collective communications library) is a library for efficient distributed deep learning training implementing such collectives like allreduce, allgather, alltoall. For more information on oneCCL, please refer to the oneCCL documentation and oneCCL specification.
+Module oneccl_bindings_for_pytorch (torch_ccl before version 1.12)  implements PyTorch C10D ProcessGroup API and can be dynamically loaded as external ProcessGroup and only works on Linux platform now
+Check more detailed information for oneccl_bind_pt.
+Intel® oneCCL Bindings for PyTorch installation
+Wheel files are available for the following Python versions:
+| Extension Version | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10 |
+| :---------------: | :--------: | :--------: | :--------: | :--------: | :---------: |
+| 2.1.0             |            | √          | √          | √          | √           |
+| 2.0.0             |            | √          | √          | √          | √           |
+| 1.13.0            |            | √          | √          | √          | √           |
+| 1.12.100          |            | √          | √          | √          | √           |
+| 1.12.0            |            | √          | √          | √          | √           |
+Please run pip list | grep torch to get your pytorch_version.
+
+pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu
+where {pytorch_version} should be your PyTorch version, for instance 2.1.0.
+Check more approaches for oneccl_bind_pt installation.
+Versions of oneCCL and PyTorch must match.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67fffe5f4d0e252c44807ca3d456c48ca972b7d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
+PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100
+
+Intel® MPI library
+Use this standards-based MPI implementation to deliver flexible, efficient, scalable cluster messaging on Intel® architecture. This component is part of the Intel® oneAPI HPC Toolkit.
+oneccl_bindings_for_pytorch is installed along with the MPI tool set. Need to source the environment before using it.
+for Intel® oneCCL >= 1.12.0
+
+oneccl_bindings_for_pytorch_path=$(python -c "from oneccl_bindings_for_pytorch import cwd; print(cwd)")
+source $oneccl_bindings_for_pytorch_path/env/setvars.sh
+for Intel® oneCCL whose version < 1.12.0
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea03622b7b4ab6949f92b9bb49fb6188e7296f8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+torch_ccl_path=$(python -c "import torch; import torch_ccl; import os;  print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))")
+source $torch_ccl_path/env/setvars.sh
+Intel® Extension for PyTorch installation
+Intel Extension for PyTorch (IPEX) provides performance optimizations for CPU training with both Float32 and BFloat16 (refer to the single CPU section to learn more).
+The following "Usage in Trainer" takes mpirun in Intel® MPI library as an example.
+Usage in Trainer
+To enable multi CPU distributed training in the Trainer with the ccl backend, users should add --ddp_backend ccl in the command arguments.
+Let's see an example with the question-answering example
+The following command enables training with 2 processes on one Xeon node, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=127.0.0.1
+ mpirun -n 2 -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex
+The following command enables training with a total of four processes on two Xeons (node0 and node1, taking node0 as the main process), ppn (processes per node) is set to 2, with one process running per one socket. The variables OMP_NUM_THREADS/CCL_WORKER_COUNT can be tuned for optimal performance.
+In node0, you need to create a configuration file which contains the IP addresses of each node (for example hostfile) and pass that configuration file path as an argument.
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..400082ce2506bb2b4d4bee82bfbca6abaa75f928
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_3.txt
@@ -0,0 +1,44 @@
+shell script
+ cat hostfile
+ xxx.xxx.xxx.xxx #node0 ip
+ xxx.xxx.xxx.xxx #node1 ip
+Now, run the following command in node0 and 4DDP will be enabled in node0 and node1 with BF16 auto mixed precision:
+shell script
+ export CCL_WORKER_COUNT=1
+ export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
+ mpirun -f hostfile -n 4 -ppn 2 \
+ -genv OMP_NUM_THREADS=23 \
+ python3 run_qa.py \
+ --model_name_or_path google-bert/bert-large-uncased \
+ --dataset_name squad \
+ --do_train \
+ --do_eval \
+ --per_device_train_batch_size 12  \
+ --learning_rate 3e-5  \
+ --num_train_epochs 2  \
+ --max_seq_length 384 \
+ --doc_stride 128  \
+ --output_dir /tmp/debug_squad/ \
+ --no_cuda \
+ --ddp_backend ccl \
+ --use_ipex \
+ --bf16
+Usage with Kubernetes
+The same distributed training job from the previous section can be deployed to a Kubernetes cluster using the
+Kubeflow PyTorchJob training operator.
+Setup
+This example assumes that you have:
+* Access to a Kubernetes cluster with Kubeflow installed
+* kubectl installed and configured to access the Kubernetes cluster
+* A Persistent Volume Claim (PVC) that can be used
+  to store datasets and model files. There are multiple options for setting up the PVC including using an NFS
+  storage class or a cloud storage bucket.
+* A Docker container that includes your model training script and all the dependencies needed to run the script. For
+  distributed CPU training jobs, this typically includes PyTorch, Transformers, Intel Extension for PyTorch, Intel
+  oneCCL Bindings for PyTorch, and OpenSSH to communicate between the containers.
+The snippet below is an example of a Dockerfile that uses a base image that supports distributed CPU training and then
+extracts a Transformers release to the /workspace directory, so that the example scripts are included in the image:
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6af00af4e70113138b93f2ffc6b09f4432bce85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+```dockerfile
+FROM intel/ai-workflows:torch-2.0.1-huggingface-multinode-py3.9
+WORKDIR /workspace
+Download and extract the transformers code
+ARG HF_TRANSFORMERS_VER="4.35.2"
+RUN mkdir transformers && \
+    curl -sSL --retry 5 https://github.com/huggingface/transformers/archive/refs/tags/v${HF_TRANSFORMERS_VER}.tar.gz | tar -C transformers --strip-components=1 -xzf -
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52d5dab6a4ac0e18cd5a426a27ddbdb8144f2848
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_5.txt
@@ -0,0 +1,53 @@
+The image needs to be built and copied to the cluster's nodes or pushed to a container registry prior to deploying the
+PyTorchJob to the cluster.
+PyTorchJob Specification File
+The Kubeflow PyTorchJob is used to run the distributed
+training job on the cluster. The yaml file for the PyTorchJob defines parameters such as:
+ * The name of the PyTorchJob
+ * The number of replicas (workers)
+ * The python script and it's parameters that will be used to run the training job
+ * The types of resources (node selector, memory, and CPU) needed for each worker
+ * The image/tag for the Docker container to use
+ * Environment variables
+ * A volume mount for the PVC
+The volume mount defines a path where the PVC will be mounted in the container for each worker pod. This location can be
+used for the dataset, checkpoint files, and the saved model after training completes.
+The snippet below is an example of a yaml file for a PyTorchJob with 4 workers running the
+question-answering example.
+yaml
+apiVersion: "kubeflow.org/v1"
+kind: PyTorchJob
+metadata:
+  name: transformers-pytorchjob
+  namespace: kubeflow
+spec:
+  elasticPolicy:
+    rdzvBackend: c10d
+    minReplicas: 1
+    maxReplicas: 4
+    maxRestarts: 10
+  pytorchReplicaSpecs:
+    Worker:
+      replicas: 4  # The number of worker pods
+      restartPolicy: OnFailure
+      template:
+        spec:
+          containers:
+            - name: pytorch
+              image: <image name>:<tag>  # Specify the docker image to use for the worker pods
+              imagePullPolicy: IfNotPresent
+              command:
+                - torchrun
+                - /workspace/transformers/examples/pytorch/question-answering/run_qa.py
+                - --model_name_or_path
+                - "google-bert/bert-large-uncased"
+                - --dataset_name
+                - "squad"
+                - --do_train
+                - --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e54ffbf6b9062631f4a461c127a7045d21542679
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+- --do_eval
+                - --per_device_train_batch_size
+                - "12"
+                - --learning_rate
+                - "3e-5"
+                - --num_train_epochs
+                - "2"
+                - --max_seq_length
+                - "384"
+                - --doc_stride
+                - "128"
+                - --output_dir
+                - "/tmp/pvc-mount/output"
+                - --no_cuda
+                - --ddp_backend
+                - "ccl"
+                - --use_ipex
+                - --bf16  # Specify --bf16 if your hardware supports bfloat16
+              env:
+              - name: LD_PRELOAD
+                value: "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4.5.9:/usr/local/lib/libiomp5.so"
+              - name: TRANSFORMERS_CACHE
+                value: "/tmp/pvc-mount/transformers_cache"
+              - name: HF_DATASETS_CACHE
+                value: "/tmp/pvc-mount/hf_datasets_cache"
+              - name: LOGLEVEL
+                value: "INFO"
+              - name: CCL_WORKER_COUNT
+                value: "1"
+              - name: OMP_NUM_THREADS  # Can be tuned for optimal performance
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21cf12aa11cf8869952c3b1b516234e4a0d21cc7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_7.txt
@@ -0,0 +1,41 @@
+resources:
+                limits:
+                  cpu: 200  # Update the CPU and memory limit values based on your nodes
+                  memory: 128Gi
+                requests:
+                  cpu: 200  # Update the CPU and memory request values based on your nodes
+                  memory: 128Gi
+              volumeMounts:
+              - name: pvc-volume
+                mountPath: /tmp/pvc-mount
+              - mountPath: /dev/shm
+                name: dshm
+          restartPolicy: Never
+          nodeSelector:  #  Optionally use the node selector to specify what types of nodes to use for the workers
+            node-type: spr
+          volumes:
+          - name: pvc-volume
+            persistentVolumeClaim:
+              claimName: transformers-pvc
+          - name: dshm
+            emptyDir:
+              medium: Memory
+To run this example, update the yaml based on your training script and the nodes in your cluster.
+
+The CPU resource limits/requests in the yaml are defined in cpu units
+where 1 CPU unit is equivalent to 1 physical CPU core or 1 virtual core (depending on whether the node is a physical
+host or a VM). The amount of CPU and memory limits/requests defined in the yaml should be less than the amount of
+available CPU/memory capacity on a single machine. It is usually a good idea to not use the entire machine's capacity in
+order to leave some resources for the kubelet and OS. In order to get "guaranteed"
+quality of service for the worker pods,
+set the same CPU and memory amounts for both the resource limits and requests.
+
+Deploy
+After the PyTorchJob spec has been updated with values appropriate for your cluster and training job, it can be deployed
+to the cluster using:
+
+kubectl create -f pytorchjob.yaml
+The kubectl get pods -n kubeflow command can then be used to list the pods in the kubeflow namespace. You should see
+the worker pods for the PyTorchJob that was just deployed. At first, they will probably have a status of "Pending" as
+the containers get pulled and created, then the status should change to "Running".
+NAME                                                     READY   STATUS                  RESTARTS          AGE
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46e0bd78e3aa51178eae0b23795b9a9bb4f38539
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_cpu_many.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+transformers-pytorchjob-worker-0                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-1                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-2                         1/1     Running                 0                 7m37s
+transformers-pytorchjob-worker-3                         1/1     Running                 0                 7m37s
+
+The logs for worker can be viewed using kubectl logs -n kubeflow <pod name>. Add -f to stream the logs, for example:
+
+kubectl logs -n kubeflow transformers-pytorchjob-worker-0 -f
+After the training job completes, the trained model can be copied from the PVC or storage location. When you are done
+with the job, the PyTorchJob resource can be deleted from the cluster using kubectl delete -f pytorchjob.yaml.
+Summary
+This guide covered running distributed PyTorch training jobs using multiple CPUs on bare metal and on a Kubernetes
+cluster. Both cases utilize Intel Extension for PyTorch and Intel oneCCL Bindings for PyTorch for optimal training
+performance, and can be used as a template to run your own workload on multiple nodes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dc914160b6cca71ac5a544c1c97781178cc0c0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Efficient Training on Multiple GPUs
+If training a model on a single GPU is too slow or if the model's weights do not fit in a single GPU's memory, transitioning 
+to a multi-GPU setup may be a viable option. Prior to making this transition, thoroughly explore all the strategies covered 
+in the Methods and tools for efficient training on a single GPU as they are universally applicable 
+to model training on any number of GPUs. Once you have employed those strategies and found them insufficient for your 
+case on a single GPU, consider moving to multiple GPUs.
+Transitioning from a single GPU to multiple GPUs requires the introduction of some form of parallelism, as the workload 
+must be distributed across the resources. Multiple techniques can be employed to achieve parallelism, such as data 
+parallelism, tensor parallelism, and pipeline parallelism. It's important to note that there isn't a one-size-fits-all 
+solution, and the optimal settings depend on the specific hardware configuration you are using. 
+This guide offers an in-depth overview of individual types of parallelism, as well as guidance on ways to combine 
+techniques and choosing an appropriate approach. For step-by-step tutorials on distributed training, please refer to
+the 🤗 Accelerate documentation. 
+
+While the main concepts discussed in this guide are likely applicable across frameworks, here we focus on 
+PyTorch-based implementations.
+
+Before diving deeper into the specifics of each technique, let's go over the rough decision process when training 
+large models on a large infrastructure.
+Scalability strategy
+Begin by estimating how much vRAM is required to train your model. For models hosted on the 🤗 Hub, use our 
+Model Memory Calculator, which gives you 
+accurate calculations within a few percent margin.  
+Parallelization strategy for a single Node / multi-GPU setup
+When training a model on a single node with multiple GPUs, your choice of parallelization strategy can significantly 
+impact performance. Here's a breakdown of your options:
+Case 1: Your model fits onto a single GPU
+If your model can comfortably fit onto a single GPU, you have two primary options:
+
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6441650e0171d425024411dddc776bb19869a2b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+DDP - Distributed DataParallel
+Zero Redundancy Optimizer (ZeRO) - depending on the situation and configuration used, this method may or may not be faster, however, it's worth experimenting with it.
+
+Case 2: Your model doesn't fit onto a single GPU:
+If your model is too large for a single GPU, you have several alternatives to consider:
+
+PipelineParallel (PP)
+ZeRO
+TensorParallel (TP)
+
+With very fast inter-node connectivity (e.g., NVLINK or NVSwitch) all three strategies (PP, ZeRO, TP) should result in 
+similar performance. However, without these, PP will be faster than TP or ZeRO. The degree of TP may also 
+make a difference. It's best to experiment with your specific setup to determine the most suitable strategy.
+TP is almost always used within a single node. That is TP size <= GPUs per node.
+Case 3: Largest layer of your model does not fit onto a single GPU
+
+If you are not using ZeRO, you have to use TensorParallel (TP), because PipelineParallel (PP) alone won't be sufficient to accommodate the large layer.
+If you are using ZeRO, additionally adopt techniques from the Methods and tools for efficient training on a single GPU.
+
+Parallelization strategy for a multi-Node / multi-GPU setup
+
+When you have fast inter-node connectivity (e.g., NVLINK or NVSwitch) consider using one of these options:
+
+ZeRO - as it requires close to no modifications to the model
+A combination of PipelineParallel(PP) with TensorParallel(TP) and DataParallel(DP) - this approach will result in fewer communications, but requires significant changes to the model
+
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c6d00204b8e1f1ab52a4b77bc16db6d13ecbb81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_10.txt
@@ -0,0 +1,25 @@
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with 
+tiny micro-batch sizes which is also inefficient. For this reason, we encourage to experiment with the chunks value to 
+find the one that leads to the most efficient GPUs utilization.
+You may notice a bubble of "dead" time on the diagram that can't be parallelized because the last forward stage 
+has to wait for backward to complete the pipeline. The purpose of finding the best value for chunks is to enable a high 
+concurrent GPU utilization across all participating GPUs which translates to minimizing the size of the bubble.
+Pipeline API solutions have been implemented in:
+- PyTorch
+- DeepSpeed
+- Megatron-LM
+These come with some shortcomings:
+- They have to modify the model quite heavily, because Pipeline requires one to rewrite the normal flow of modules into a nn.Sequential sequence of the same, which may require changes to the design of the model.
+- Currently the Pipeline API is very restricted. If you had a bunch of Python variables being passed in the very first stage of the Pipeline, you will have to find a way around it. Currently, the pipeline interface requires either a single Tensor or a tuple of Tensors as the only input and output. These tensors must have a batch size as the very first dimension, since pipeline is going to chunk the mini batch into micro-batches. Possible improvements are being discussed here https://github.com/pytorch/pytorch/pull/50693
+- Conditional control flow at the level of pipe stages is not possible - e.g., Encoder-Decoder models like T5 require special workarounds to handle a conditional encoder stage.
+- They have to arrange each layer so that the output of one layer becomes an input to the other layer.
+More recent solutions include:
+- Varuna
+- Sagemaker
+We have not experimented with Varuna and SageMaker but their papers report that they have overcome the list of problems 
+mentioned above and that they require smaller changes to the user's model.
+Implementations:
+- PyTorch (initial support in pytorch-1.8, and progressively getting improved in 1.9 and more so in 1.10). Some examples
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47676ab91492ad8b703bc530b8b15a8bb5d8edf7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+- DeepSpeed
+- Megatron-LM has an internal implementation - no API.
+- Varuna
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO - this is implemented based on the Hugging Face Transformers.
+🤗 Transformers status: as of this writing none of the models supports full-PP. GPT2 and T5 models have naive MP support. 
+The main obstacle is being unable to convert the models to nn.Sequential and have all the inputs to be Tensors. This 
+is because currently the models include many features that make the conversion very complicated, and will need to be removed to accomplish that.
+DeepSpeed and Megatron-LM integrations are available in 🤗 Accelerate
+Other approaches:
+DeepSpeed, Varuna and SageMaker use the concept of an Interleaved Pipeline
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df21a452e6502b304daea41c58a324ea3fa28c96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+Here the bubble (idle time) is further minimized by prioritizing backward passes. Varuna further attempts to improve the 
+schedule by using simulations to discover the most efficient scheduling.
+OSLO has pipeline parallelism implementation based on the Transformers without nn.Sequential conversion.
+Tensor Parallelism
+In Tensor Parallelism, each GPU processes a slice of a tensor and only aggregates the full tensor for operations requiring it.
+To describe this method, this section of the guide relies on the concepts and diagrams from the Megatron-LM 
+paper: Efficient Large-Scale Language Model Training on GPU Clusters.
+The main building block of any transformer is a fully connected nn.Linear followed by a nonlinear activation GeLU.
+The dot dot-product part of it, following the Megatron's paper notation, can be written as Y = GeLU(XA), where X is 
+an input vector, Y is the output vector, and A is the weight matrix.
+If we look at the computation in matrix form, you can see how the matrix multiplication can be split between multiple GPUs:
+
+If we split the weight matrix A column-wise across N GPUs and perform matrix multiplications XA_1 through XA_n in parallel, 
+then we will end up with N output vectors Y_1, Y_2, , Y_n which can be fed into GeLU independently:
+
+Using this principle, we can update a multi-layer perceptron of arbitrary depth, without the need for any synchronization 
+between GPUs until the very end, where we need to reconstruct the output vector from shards. The Megatron-LM paper authors 
+provide a helpful illustration for that:
+
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6866988b0102b03560f4c3ec50ecce96a3a2408b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_13.txt
@@ -0,0 +1,24 @@
+Parallelizing the multi-headed attention layers is even simpler, since they are already inherently parallel, due to having 
+multiple independent heads!
+
+Special considerations: TP requires very fast network, and therefore it's not advisable to do TP across more than one node. 
+Practically, if a node has 4 GPUs, the highest TP degree is therefore 4. If you need a TP degree of 8, you need to use
+nodes that have at least 8 GPUs.
+This section is based on the original much more detailed TP overview.
+by @anton-l.
+Alternative names:
+- DeepSpeed calls it tensor slicing
+Implementations:
+- Megatron-LM has an internal implementation, as it's very model-specific
+- parallelformers (only inference at the moment)
+- SageMaker - this is a proprietary solution that can only be used on AWS.
+- OSLO has the tensor parallelism implementation based on the Transformers.
+SageMaker combines TP with DP for a more efficient processing.
+🤗 Transformers status:
+- core: not yet implemented in the core
+- but if you want inference parallelformers provides this support for most of our models. So until this is implemented in the core you can use theirs. And hopefully training mode will be supported too.
+- Deepspeed-Inference also supports our BERT, GPT-2, and GPT-Neo models in their super-fast CUDA-kernel-based inference mode, see more here
+🤗 Accelerate integrates with TP from Megatron-LM.
+Data Parallelism + Pipeline Parallelism
+The following diagram from the DeepSpeed pipeline tutorial demonstrates 
+how one can combine DP with PP.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae0d9da4c36f85183f4ff0c2aa5d8717416c6ea0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_14.txt
@@ -0,0 +1,13 @@
+Here it's important to see how DP rank 0 doesn't see GPU2 and DP rank 1 doesn't see GPU3. To DP there is just GPUs 0 
+and 1 where it feeds data as if there were just 2 GPUs. GPU0 "secretly" offloads some of its load to GPU2 using PP. 
+And GPU1 does the same by enlisting GPU3 to its aid.
+Since each dimension requires at least 2 GPUs, here you'd need at least 4 GPUs.
+Implementations:
+- DeepSpeed
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented
+Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+To get an even more efficient training a 3D parallelism is used where PP is combined with TP and DP. This can be seen in the following diagram.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46dd4b9c47b4e807abffa986b4e3bf30826f5ab3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_15.txt
@@ -0,0 +1,29 @@
+This diagram is from a blog post 3D parallelism: Scaling to trillion-parameter models, which is a good read as well.
+Since each dimension requires at least 2 GPUs, here you'd need at least 8 GPUs.
+Implementations:
+- DeepSpeed - DeepSpeed also includes an even more efficient DP, which they call ZeRO-DP.
+- Megatron-LM
+- Varuna
+- SageMaker
+- OSLO
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+ZeRO Data Parallelism + Pipeline Parallelism + Tensor Parallelism
+One of the main features of DeepSpeed is ZeRO, which is a super-scalable extension of DP. It has already been 
+discussed in ZeRO Data Parallelism. Normally it's a standalone feature that doesn't require PP or TP. 
+But it can be combined with PP and TP.
+When ZeRO-DP is combined with PP (and optionally TP) it typically enables only ZeRO stage 1 (optimizer sharding).
+While it's theoretically possible to use ZeRO stage 2 (gradient sharding) with Pipeline Parallelism, it will have negative 
+performance impacts. There would need to be an additional reduce-scatter collective for every micro-batch to aggregate 
+the gradients before sharding, which adds a potentially significant communication overhead. By nature of Pipeline Parallelism, 
+small micro-batches are used and instead the focus is on trying to balance arithmetic intensity (micro-batch size) with
+minimizing the Pipeline bubble (number of micro-batches). Therefore those communication costs are going to impact the performance.
+In addition, there are already fewer layers than normal due to PP and so the memory savings won't be huge. PP already 
+reduces gradient size by 1/PP, and so gradient sharding savings on top of that are less significant than pure DP.
+ZeRO stage 3 is not a good choice either for the same reason - more inter-node communications required.
+And since we have ZeRO, the other benefit is ZeRO-Offload. Since this is stage 1 optimizer states can be offloaded to CPU.
+Implementations:
+- Megatron-DeepSpeed and Megatron-Deepspeed from BigScience, which is the fork of the former repo.
+- OSLO
+Important papers:
+
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d55a96a396f4d0eaf3339cb84c1c5abec3fac78c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_16.txt
@@ -0,0 +1,30 @@
+Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model
+
+🤗 Transformers status: not yet implemented, since we have no PP and TP.
+FlexFlow
+FlexFlow also solves the parallelization problem in a slightly different approach.
+Paper: "Beyond Data and Model Parallelism for Deep Neural Networks" by Zhihao Jia, Matei Zaharia, Alex Aiken
+It performs a sort of 4D Parallelism over Sample-Operator-Attribute-Parameter.
+
+Sample = Data Parallelism (sample-wise parallel)
+Operator = Parallelize a single operation into several sub-operations
+Attribute = Data Parallelism (length-wise parallel)
+Parameter = Model Parallelism (regardless of dimension - horizontal or vertical)
+
+Examples:
+* Sample
+Let's take 10 batches of sequence length 512. If we parallelize them by sample dimension into 2 devices, we get 10 x 512 which becomes be 5 x 2 x 512.
+
+Operator
+
+If we perform layer normalization, we compute std first and mean second, and then we can normalize data. 
+Operator parallelism allows computing std and mean in parallel. So if we parallelize them by operator dimension into 2 
+devices (cuda:0, cuda:1), first we copy input data into both devices, and cuda:0 computes std, cuda:1 computes mean at the same time.
+
+Attribute
+
+We have 10 batches of 512 length. If we parallelize them by attribute dimension into 2 devices, 10 x 512 will be 10 x 2 x 256.
+
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cff190762dc5577533f21d7134e3132728a7383
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_17.txt
@@ -0,0 +1,30 @@
+Parameter
+
+It is similar with tensor model parallelism or naive layer-wise model parallelism.
+
+The significance of this framework is that it takes resources like (1) GPU/TPU/CPU vs. (2) RAM/DRAM vs. (3) 
+fast-intra-connect/slow-inter-connect and it automatically optimizes all these algorithmically deciding which 
+parallelisation to use where.
+One very important aspect is that FlexFlow is designed for optimizing DNN parallelizations for models with static and 
+fixed workloads, since models with dynamic behavior may prefer different parallelization strategies across iterations.
+So the promise is very attractive - it runs a 30min simulation on the cluster of choice and it comes up with the best 
+strategy to utilise this specific environment. If you add/remove/replace any parts it'll run and re-optimize the plan 
+for that. And then you can train. A different setup will have its own custom optimization.
+🤗 Transformers status: Transformers models are FX-trace-able via transformers.utils.fx, 
+which is a prerequisite for FlexFlow, however, changes are required on the FlexFlow side to make it work with Transformers models.
+GPU selection
+When training on multiple GPUs, you can specify the number of GPUs to use and in what order. This can be useful for instance when you have GPUs with different computing power and want to use the faster GPU first. The selection process works for both DistributedDataParallel and DataParallel to use only a subset of the available GPUs, and you don't need Accelerate or the DeepSpeed integration.
+Number of GPUs
+For example, if you have 4 GPUs and you only want to use the first 2:
+
+Use the --nproc_per_node to select how many GPUs to use.
+
+torchrun --nproc_per_node=2  trainer-program.py 
+
+Use --num_processes to select how many GPUs to use.
+
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a46c8a5f8925b6a3038197acc1d829312aa65cbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_18.txt
@@ -0,0 +1,26 @@
+accelerate launch --num_processes 2 trainer-program.py 
+
+Use --num_gpus to select how many GPUs to use.
+
+deepspeed --num_gpus 2 trainer-program.py 
+
+Order of GPUs
+Now, to select which GPUs to use and their order, you'll use the CUDA_VISIBLE_DEVICES environment variable. It is easiest to set the environment variable in a ~/bashrc or another startup config file. CUDA_VISIBLE_DEVICES is used to map which GPUs are used. For example, if you have 4 GPUs (0, 1, 2, 3) and you only want to run GPUs 0 and 2:
+
+CUDA_VISIBLE_DEVICES=0,2 torchrun trainer-program.py 
+Only the 2 physical GPUs (0 and 2) are "visible" to PyTorch and these are mapped to cuda:0 and cuda:1 respectively. You can also reverse the order of the GPUs to use 2 first. Now, the mapping is cuda:1 for GPU 0 and cuda:0 for GPU 2.
+
+CUDA_VISIBLE_DEVICES=2,0 torchrun trainer-program.py 
+You can also set the CUDA_VISIBLE_DEVICES environment variable to an empty value to create an environment without GPUs.
+
+CUDA_VISIBLE_DEVICES= python trainer-program.py 
+
+As with any environment variable, they can be exported instead of being added to the command line. However, this is not recommended because it can be confusing if you forget how the environment variable was setup and you end up using the wrong GPUs. Instead, it is common practice to set the environment variable for a specific training run on the same command line.
+
+CUDA_DEVICE_ORDER is an alternative environment variable you can use to control how the GPUs are ordered. You can either order them by:
+
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c76bca764eaba713034c161249bc247f4cf6bb5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_19.txt
@@ -0,0 +1,8 @@
+PCIe bus ID's that matches the order of nvidia-smi and rocm-smi for NVIDIA and AMD GPUs respectively
+
+export CUDA_DEVICE_ORDER=PCI_BUS_ID
+
+GPU compute ability
+
+export CUDA_DEVICE_ORDER=FASTEST_FIRST
+The CUDA_DEVICE_ORDER is especially useful if your training setup consists of an older and newer GPU, where the older GPU appears first, but you cannot physically swap the cards to make the newer GPU appear first. In this case, set CUDA_DEVICE_ORDER=FASTEST_FIRST to always use the newer and faster GPU first (nvidia-smi or rocm-smi still reports the GPUs in their PCIe order). Or you could also set export CUDA_VISIBLE_DEVICES=1,0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45e46276739dd388da1f68bd7421ed99082f49ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+When you have slow inter-node connectivity and still low on GPU memory:
+
+Employ a combination of DataParallel(DP) with PipelineParallel(PP), TensorParallel(TP), and ZeRO.
+
+In the following sections of this guide we dig deeper into how these different parallelism methods work.
+Data Parallelism
+Even with only 2 GPUs, you can readily leverage the accelerated training capabilities offered by PyTorch's built-in features, 
+such as DataParallel (DP) and DistributedDataParallel (DDP). Note that 
+PyTorch documentation recommends to prefer 
+DistributedDataParallel (DDP) over DataParallel (DP) for multi-GPU training as it works for all models.
+Let's take a look at how these two methods work and what makes them different.
+DataParallel vs DistributedDataParallel
+To understand the key differences in inter-GPU communication overhead between the two methods, let's review the processes per batch:
+DDP:
+
+At the start time the main process replicates the model once from GPU 0 to the rest of GPUs
+Then for each batch:
+Each GPU directly consumes its mini-batch of data.
+During backward, once the local gradients are ready, they are averaged across all processes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d5c3d3d7c6549544f30482afc91b2808693c152
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+DP:
+For each batch:
+   1. GPU 0 reads the batch of data and then sends a mini-batch to each GPU.
+   2. The up-to-date model is replicated from GPU 0 to each GPU. 
+   3. forward is executed, and output from each GPU is sent to GPU 0 to compute the loss.
+   4. The loss is distributed from GPU 0 to all GPUs, and backward is run. 
+   5. Gradients from each GPU are sent to GPU 0 and averaged. 
+Key differences include:
+1. DDP performs only a single communication per batch - sending gradients, while DP performs five different data exchanges per batch.
+DDP copies data using torch.distributed, while DP copies data within 
+the process via Python threads (which introduces limitations associated with GIL). As a result, DistributedDataParallel (DDP) is generally faster than DataParallel (DP) unless you have slow GPU card inter-connectivity.
+2. Under DP, GPU 0 performs significantly more work than other GPUs, resulting in GPU under-utilization. 
+3. DDP supports distributed training across multiple machines, whereas DP does not.
+This is not an exhaustive list of differences between DP and DDP, however, other nuances are out of scope of this guide.
+You can get a deeper understanding of these methods by reading this article.
+Let's illustrate the differences between DP and DDP with an experiment. We'll benchmark the differences between DP and 
+DDP with an added context of NVLink presence:  
+
+Hardware: 2x TITAN RTX 24GB each + NVlink with 2 NVLinks (NV2 in nvidia-smi topo -m).
+Software: pytorch-1.8-to-be + cuda-11.0 / transformers==4.3.0.dev0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb96f5f9b790953b98ff65cb1e5da57a643003ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+To disable the NVLink feature on one of the benchmarks, we use NCCL_P2P_DISABLE=1. 
+Here is the benchmarking code and outputs:
+DP
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+python examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 110.5948, 'train_samples_per_second': 1.808, 'epoch': 0.69}
+
+DDP w/ NVlink
+```bash
+rm -r /tmp/test-clm; CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 101.9003, 'train_samples_per_second': 1.963, 'epoch': 0.69}
+
+DDP w/o NVlink
+```bash
+rm -r /tmp/test-clm; NCCL_P2P_DISABLE=1 CUDA_VISIBLE_DEVICES=0,1 \
+torchrun --nproc_per_node 2 examples/pytorch/language-modeling/run_clm.py \
+--model_name_or_path openai-community/gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 \
+--do_train --output_dir /tmp/test-clm --per_device_train_batch_size 4 --max_steps 200
+{'train_runtime': 131.4367, 'train_samples_per_second': 1.522, 'epoch': 0.69}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08089c95f2424f15012bce7542f38c12e658546f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_5.txt
@@ -0,0 +1,37 @@
+Here are the same benchmarking results gathered in a table for convenience:
+| Type   | NVlink | Time |
+| :----- | -----  | ---: |
+| 2:DP   | Y      | 110s |
+| 2:DDP  | Y      | 101s |
+| 2:DDP  | N      | 131s |
+As you can see, in this case DP is ~10% slower than DDP with NVlink, but ~15% faster than DDP without NVlink.
+The real difference will depend on how much data each GPU needs to sync with the others - the more there is to sync, 
+the more a slow link will impede the overall runtime.
+ZeRO Data Parallelism
+ZeRO-powered data parallelism (ZeRO-DP) is illustrated in the following diagram from this blog post.
+
+While it may appear complex, it is a very similar concept to DataParallel (DP). The difference is that instead of 
+replicating the full model parameters, gradients and optimizer states, each GPU stores only a slice of it. Then, at 
+run-time when the full layer parameters are needed just for the given layer, all GPUs synchronize to give each other 
+parts that they miss.
+To illustrate this idea, consider a simple model with 3 layers (La, Lb, and Lc), where each layer has 3 parameters. 
+Layer La, for example, has weights a0, a1 and a2:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+a1 | b1 | c1
+a2 | b2 | c2
+If we have 3 GPUs, ZeRO-DP splits the model onto 3 GPUs like so:
+
+GPU0:
+La | Lb | Lc
+---|----|---
+a0 | b0 | c0
+GPU1:
+La | Lb | Lc
+---|----|---
+a1 | b1 | c1
+GPU2:
+La | Lb | Lc
+---|----|---
+a2 | b2 | c2
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a13241ba6b3c516417d88bfe52ea4804e1855e7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_6.txt
@@ -0,0 +1,22 @@
+In a way, this is the same horizontal slicing as tensor parallelism, as opposed to Vertical 
+slicing, where one puts whole layer-groups on different GPUs. Now let's see how this works: 
+Each of these GPUs will get the usual mini-batch as it works in DP:
+x0 => GPU0
+x1 => GPU1
+x2 => GPU2
+The inputs are passed without modifications as if they would be processed by the original model.
+First, the inputs get to the layer La. What happens at this point?
+On GPU0: the x0 mini-batch requires the a0, a1, a2 parameters to do its forward path through the layer, but the GPU0 has only a0. 
+It will get a1 from GPU1 and a2 from GPU2, bringing all the pieces of the model together.
+In parallel, GPU1 gets another mini-batch - x1. GPU1 has the a1 parameter, but needs a0 and a2, so it gets those from GPU0 and GPU2.
+Same happens to GPU2 that gets the mini-batch x2. It gets a0 and a1 from GPU0 and GPU1.
+This way each of the 3 GPUs gets the full tensors reconstructed and makes a forward pass with its own mini-batch.
+As soon as the calculation is done, the data that is no longer needed gets dropped - it's only used during the calculation. 
+The reconstruction is done efficiently via a pre-fetch.
+Then the whole process is repeated for layer Lb, then Lc forward-wise, and then backward Lc -> Lb -> La.
+
+This mechanism is similar to an efficient group backpacking strategy: person A carries the tent, person B carries the stove,
+and person C carries the axe. Each night they all share what they have with others and get from others what they don't have, 
+and in the morning they pack up their allocated type of gear and continue on their way. This is what ZeRO DP/Sharded DDP is.
+Compare this strategy to the simple one where each person has to carry their own tent, stove and axe (similar to 
+DataParallel (DP and DDP) in PyTorch), which would be far more inefficient.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28650c58d675a4abde7c5e2618d1af3972fc4d47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_7.txt
@@ -0,0 +1,30 @@
+While reading the literature on this topic you may encounter the following synonyms: Sharded, Partitioned.
+If you pay close attention the way ZeRO partitions the model's weights - it looks very similar to tensor parallelism 
+which will be discussed later. This is because it partitions/shards each layer's weights, unlike vertical model parallelism 
+which is discussed next.
+Implementations:
+
+DeepSpeed ZeRO-DP stages 1+2+3
+Accelerate integration 
+transformers integration
+
+From Naive Model Parallelism to Pipeline Parallelism
+To explain Pipeline parallelism, we'll first look into Naive Model Parallelism (MP), also known as Vertical MP. This approach
+involves distributing groups of model layers across multiple GPUs by assigning specific layers to specific GPUs with .to(). 
+As data flows through these layers, it is moved to the same GPU as the layer, while the other layers remain untouched.
+We refer to this Model parallelism as "Vertical" because of how models are typically visualized. For example, the 
+following diagram shows an 8-layer model split vertically into two slices, placing layers 0-3 onto 
+GPU0 and 4-7 to GPU1:
+
+| Layer |      |
+|   0   |      |
+|   1   | GPU0 |
+|   2   |      |
+|   3   |      |
+================
+| Layer |      |
+|   4   |      |
+|   5   | GPU1 |
+|   6   |      |
+|   7   |      |
+================
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d73dea609a7bd4b43d0d070667600d4d436a47d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_8.txt
@@ -0,0 +1,16 @@
+In this example, when data moves from layer 0 to 3, it's no different from regular forward pass. However, passing data 
+from layer 3 to 4 requires moving it from GPU0 to GPU1, introducing a communication overhead. If the participating 
+GPUs are on the same compute node (e.g. same physical machine) this copying is fast, but if the GPUs are distributed 
+across different compute nodes (e.g. multiple machines), the communication overhead could be substantially greater.
+Following that, layers 4 to 7 work as they would in the original model. Upon completion of the 7th layer, there is often 
+a need to send the data back to layer 0 where the labels are (or alternatively send the labels to the last layer). Now the loss can be 
+computed and the optimizer can do its work.
+Naive Model Parallelism comes several shortcomings:
+- All but one GPU are idle at any given moment: if 4 GPUs are used, it's nearly identical to quadrupling the amount of memory of a single GPU, and ignoring the rest of the hardware. 
+- Overhead in data transfer between devices:  E.g. 4x 6GB cards will be able to accommodate the same size as 1x 24GB card using naive MP, but a single 24GB card will complete the training faster, because it doesn't have the data copying overhead. But, say, if you have 40GB cards and need to fit a 45GB model you can with 4x 40GB cards (but barely because of the gradient and optimizer states)
+- Copying shared embeddings: Shared embeddings may need to get copied back and forth between GPUs.
+Now that you are familiar with how the naive approach to model parallelism works and its shortcomings, let's look at Pipeline Parallelism (PP).
+PP is almost identical to a naive MP, but it solves the GPU idling problem by chunking the incoming batch into micro-batches 
+and artificially creating a pipeline, which allows different GPUs to concurrently participate in the computation process.
+The following illustration from the GPipe paper 
+shows the naive MP on the top, and PP on the bottom:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ede8656d0ac1c8d21eac3024598bbc14f146d849
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_many.txt_chunk_9.txt
@@ -0,0 +1,17 @@
+At the bottom of the diagram, you can observe that the Pipeline Parallelism (PP) approach minimizes the number of idle 
+GPU zones, referred to as 'bubbles'. Both parts of the diagram show a parallelism level of degree 4, meaning that 4 GPUs 
+are involved in the pipeline. You can see that there's a forward path of 4 pipe stages (F0, F1, F2 and F3) followed by 
+a backward path in reverse order (B3, B2, B1, and B0).
+PP introduces a new hyperparameter to tune - chunks, which determines how many data chunks are sent in a sequence 
+through the same pipe stage. For example, in the bottom diagram you can see chunks=4. GPU0 performs the same 
+forward path on chunk 0, 1, 2 and 3 (F0,0, F0,1, F0,2, F0,3) and then it waits for other GPUs to do complete their work. 
+Only when the other GPUs begin to complete their work, GPU0 starts to work again doing the backward path for chunks 
+3, 2, 1 and 0 (B0,3, B0,2, B0,1, B0,0).
+Note that this is the same concept as gradient accumulation steps. PyTorch uses chunks, while DeepSpeed refers 
+to the same hyperparameter as gradient accumulation steps.
+Because of the chunks, PP introduces the notion of micro-batches (MBS). DP splits the global data batch size into 
+mini-batches, so if you have a DP degree of 4, a global batch size of 1024 gets split up into 4 mini-batches of 
+256 each (1024/4). And if the number of chunks (or GAS) is 32 we end up with a micro-batch size of 8 (256/32). Each 
+Pipeline stage works with a single micro-batch at a time. To calculate the global batch size of the DP + PP setup, 
+use the formula: mbs * chunks * dp_degree (8 * 32 * 4 = 1024).
+With chunks=1 you end up with the naive MP, which is inefficient. With a large chunks value you end up with
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ec60bfb7699c74a11212f3e05eabec08b1ffb8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Methods and tools for efficient training on a single GPU
+This guide demonstrates practical techniques that you can use to increase the efficiency of your model's training by 
+optimizing memory utilization, speeding up the training, or both. If you'd like to understand how GPU is utilized during 
+training, please refer to the Model training anatomy conceptual guide first. This guide 
+focuses on practical techniques.  
+
+If you have access to a machine with multiple GPUs, these approaches are still valid, plus you can leverage additional methods outlined in the multi-GPU section.
+
+When training large models, there are two aspects that should be considered at the same time: 
+
+Data throughput/training time
+Model performance
+
+Maximizing the throughput (samples/second) leads to lower training cost. This is generally achieved by utilizing the GPU 
+as much as possible and thus filling GPU memory to its limit. If the desired batch size exceeds the limits of the GPU memory, 
+the memory optimization techniques, such as gradient accumulation, can help.
+However, if the preferred batch size fits into memory, there's no reason to apply memory-optimizing techniques because they can 
+slow down the training. Just because one can use a large batch size, does not necessarily mean they should. As part of 
+hyperparameter tuning, you should determine which batch size yields the best results and then optimize resources accordingly.
+The methods and tools covered in this guide can be classified based on the effect they have on the training process:
+| Method/tool                                                | Improves training speed | Optimizes memory utilization |
+|:-----------------------------------------------------------|:------------------------|:-----------------------------|
+| Batch size choice                    | Yes                     | Yes                          |
+| Gradient accumulation            | No                      | Yes                          |
+| Gradient checkpointing          | No                      | Yes                          |
+| Mixed precision training      | Yes                     | (No)                         |
+| Optimizer choice                      | Yes                     | Yes                          |
+| Data preloading                        | Yes                     | No                           |
+| DeepSpeed Zero                          | No                      | Yes                          |
+| torch.compile                       | Yes                     | No                           |
+| Parameter-Efficient Fine Tuning (PEFT)            | No                      | Yes                          |
+
+Note: when using mixed precision with a small model and a large batch size, there will be some memory savings but with a 
+large model and a small batch size, the memory use will be larger.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22143a9761bc3ba84e5c471cde89d520312c1c33
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+You can combine the above methods to get a cumulative effect. These techniques are available to you whether you are 
+training your model with [Trainer] or writing a pure PyTorch loop, in which case you can configure these optimizations 
+with 🤗 Accelerate.
+If these methods do not result in sufficient gains, you can explore the following options: 
+* Look into building your own custom Docker container with efficient software prebuilds
+* Consider a model that uses Mixture of Experts (MoE)
+* Convert your model to BetterTransformer to leverage PyTorch native attention
+Finally, if all of the above is still not enough, even after switching to a server-grade GPU like A100, consider moving 
+to a multi-GPU setup. All these approaches are still valid in a multi-GPU setup, plus you can leverage additional parallelism 
+techniques outlined in the multi-GPU section. 
+Batch size choice
+To achieve optimal performance, start by identifying the appropriate batch size. It is recommended to use batch sizes and 
+input/output neuron counts that are of size 2^N. Often it's a multiple of 8, but it can be 
+higher depending on the hardware being used and the model's dtype.
+For reference, check out NVIDIA's recommendation for input/output neuron counts and 
+batch size for 
+fully connected layers (which are involved in GEMMs (General Matrix Multiplications)).
+Tensor Core Requirements 
+define the multiplier based on the dtype and the hardware. For instance, for fp16 data type a multiple of 8 is recommended, unless 
+it's an A100 GPU, in which case use multiples of 64.
+For parameters that are small, consider also Dimension Quantization Effects. 
+This is where tiling happens and the right multiplier can have a significant speedup.
+Gradient Accumulation
+The gradient accumulation method aims to calculate gradients in smaller increments instead of computing them for the 
+entire batch at once. This approach involves iteratively calculating gradients in smaller batches by performing forward 
+and backward passes through the model and accumulating the gradients during the process. Once a sufficient number of 
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..128d337849bba190a743dd97a590663fa6927f6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_10.txt
@@ -0,0 +1,40 @@
+import bitsandbytes as bnb
+from torch import nn
+from transformers.trainer_pt_utils import get_parameter_names
+training_args = TrainingArguments(per_device_train_batch_size=4, **default_args)
+decay_parameters = get_parameter_names(model, [nn.LayerNorm])
+decay_parameters = [name for name in decay_parameters if "bias" not in name]
+optimizer_grouped_parameters = [
+    {
+        "params": [p for n, p in model.named_parameters() if n in decay_parameters],
+        "weight_decay": training_args.weight_decay,
+    },
+    {
+        "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
+        "weight_decay": 0.0,
+    },
+]
+optimizer_kwargs = {
+    "betas": (training_args.adam_beta1, training_args.adam_beta2),
+    "eps": training_args.adam_epsilon,
+}
+optimizer_kwargs["lr"] = training_args.learning_rate
+adam_bnb_optim = bnb.optim.Adam8bit(
+    optimizer_grouped_parameters,
+    betas=(training_args.adam_beta1, training_args.adam_beta2),
+    eps=training_args.adam_epsilon,
+    lr=training_args.learning_rate,
+)
+
+Finally, pass the custom optimizer as an argument to the Trainer:
+py
+trainer = Trainer(model=model, args=training_args, train_dataset=ds, optimizers=(adam_bnb_optim, None))
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training), 
+you can expect to get about a 3x memory improvement and even slightly higher throughput as using Adafactor. 
+multi_tensor
+pytorch-nightly introduced torch.optim._multi_tensor which should significantly speed up the optimizers for situations 
+with lots of small feature tensors. It should eventually become the default, but if you want to experiment with it sooner, take a look at this GitHub issue.
+Data preloading
+One of the important requirements to reach great training speed is the ability to feed the GPU at the maximum speed it 
+can handle. By default, everything happens in the main process, and it might not be able to read the data from disk fast 
+enough, and thus create a bottleneck, leading to GPU under-utilization. Configure the following arguments to reduce the bottleneck:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c58e78a15de2b2739912eb2d3a1ca11fd1419ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_11.txt
@@ -0,0 +1,17 @@
+DataLoader(pin_memory=True, ) - ensures the data gets preloaded into the pinned memory on CPU and typically leads to much faster transfers from CPU to GPU memory.
+DataLoader(num_workers=4, ) - spawn several workers to preload data faster. During training, watch the GPU utilization stats; if it's far from 100%, experiment with increasing the number of workers. Of course, the problem could be elsewhere, so many workers won't necessarily lead to better performance.
+
+When using [Trainer], the corresponding [TrainingArguments] are: dataloader_pin_memory (True by default), and dataloader_num_workers (defaults to 0).
+DeepSpeed ZeRO
+DeepSpeed is an open-source deep learning optimization library that is integrated with 🤗 Transformers and 🤗 Accelerate.
+It provides a wide range of features and optimizations designed to improve the efficiency and scalability of large-scale 
+deep learning training.
+If your model fits onto a single GPU and you have enough space to fit a small batch size, you don't need to use DeepSpeed
+as it'll only slow things down. However, if the model doesn't fit onto a single GPU or you can't fit a small batch, you can 
+leverage DeepSpeed ZeRO + CPU Offload, or NVMe Offload for much larger models. In this case, you need to separately
+install the library, then follow one of the guides to create a configuration file 
+and launch DeepSpeed: 
+
+For an in-depth guide on DeepSpeed integration with [Trainer], review the corresponding documentation, specifically the 
+section for a single GPU. Some adjustments are required to use DeepSpeed in a notebook; please take a look at the corresponding guide.
+If you prefer to use 🤗 Accelerate, refer to 🤗 Accelerate DeepSpeed guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0bdead22a64bcbf4508d23d44131bfb05b97cf8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_12.txt
@@ -0,0 +1,23 @@
+Using torch.compile
+PyTorch 2.0 introduced a new compile function that doesn't require any modification to existing PyTorch code but can 
+optimize your code by adding a single line of code: model = torch.compile(model).
+If using [Trainer], you only need to pass the torch_compile option in the [TrainingArguments]: 
+python
+training_args = TrainingArguments(torch_compile=True, **default_args)
+torch.compile uses Python's frame evaluation API to automatically create a graph from existing PyTorch programs. After 
+capturing the graph, different backends can be deployed to lower the graph to an optimized engine. 
+You can find more details and benchmarks in PyTorch documentation.
+torch.compile has a growing list of backends, which can be found in by calling torchdynamo.list_backends(), each of which with its optional dependencies.
+Choose which backend to use by specifying it via torch_compile_backend in the [TrainingArguments].  Some of the most commonly used backends are:
+Debugging backends:
+* dynamo.optimize("eager") - Uses PyTorch to run the extracted GraphModule. This is quite useful in debugging TorchDynamo issues.
+* dynamo.optimize("aot_eager") - Uses AotAutograd with no compiler, i.e, just using PyTorch eager for the AotAutograd's extracted forward and backward graphs. This is useful for debugging, and unlikely to give speedups.
+Training & inference backends:
+* dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels  Read more
+* dynamo.optimize("nvfuser") -  nvFuser with TorchScript. Read more
+* dynamo.optimize("aot_nvfuser") -  nvFuser with AotAutograd. Read more
+* dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd. Read more
+Inference-only backends:
+* dynamo.optimize("ofi") -  Uses Torchscript optimize_for_inference.  Read more
+* dynamo.optimize("fx2trt") -  Uses NVIDIA TensorRT for inference optimizations.  Read more
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02db1e9a92d2f2d7d42da45eeb4ead1eaa9de346
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_13.txt
@@ -0,0 +1,27 @@
+* dynamo.optimize("onnxrt") -  Uses ONNXRT for inference on CPU/GPU.  Read more
+* dynamo.optimize("ipex") -  Uses IPEX for inference on CPU.  Read more
+For an example of using torch.compile with 🤗 Transformers, check out this blog post on fine-tuning a BERT model for Text Classification using the newest PyTorch 2.0 features
+Using 🤗 PEFT
+Parameter-Efficient Fine Tuning (PEFT) methods freeze the pretrained model parameters during fine-tuning and add a small number of trainable parameters (the adapters) on top of it.
+As a result the memory associated to the optimizer states and gradients are greatly reduced.
+For example with a vanilla AdamW, the memory requirement for the optimizer state would be:
+* fp32 copy of parameters: 4 bytes/param
+* Momentum: 4 bytes/param
+* Variance: 4 bytes/param
+Suppose a model with 7B parameters and 200 millions parameters injected with Low Rank Adapters.
+The memory requirement for the optimizer state of the plain model would be 12 * 7 = 84 GB (assuming 7B trainable parameters).
+Adding Lora increases slightly the memory associated to the model weights and substantially decreases memory requirement for the optimizer state to 12 * 0.2 = 2.4GB.
+Read more about PEFT and its detailed usage in the PEFT documentation or PEFT repository.
+Using 🤗 Accelerate
+With 🤗 Accelerate you can use the above methods while gaining full 
+control over the training loop and can essentially write the loop in pure PyTorch with some minor modifications. 
+Suppose you have combined the methods in the [TrainingArguments] like so:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    gradient_checkpointing=True,
+    fp16=True,
+    **default_args,
+)
+The full example training loop with 🤗 Accelerate is only a handful of lines of code long:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99453f15d14b4fcd96f2b26374064282f7dd537b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_14.txt
@@ -0,0 +1,15 @@
+from accelerate import Accelerator
+from torch.utils.data.dataloader import DataLoader
+dataloader = DataLoader(ds, batch_size=training_args.per_device_train_batch_size)
+if training_args.gradient_checkpointing:
+    model.gradient_checkpointing_enable()
+accelerator = Accelerator(fp16=training_args.fp16)
+model, optimizer, dataloader = accelerator.prepare(model, adam_bnb_optim, dataloader)
+model.train()
+for step, batch in enumerate(dataloader, start=1):
+    loss = model(**batch).loss
+    loss = loss / training_args.gradient_accumulation_steps
+    accelerator.backward(loss)
+    if step % training_args.gradient_accumulation_steps == 0:
+        optimizer.step()
+        optimizer.zero_grad()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d406af0c8896f17d1ad5076a7447fe6da5614eb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_15.txt
@@ -0,0 +1,24 @@
+First we wrap the dataset in a DataLoader. 
+Then we can enable gradient checkpointing by calling the model's [~PreTrainedModel.gradient_checkpointing_enable] method. 
+When we initialize the Accelerator 
+we can specify if we want to use mixed precision training and it will take care of it for us in the [prepare] call. 
+During the prepare 
+call the dataloader will also be distributed across workers should we use multiple GPUs. We use the same 8-bit optimizer from the earlier example.
+Finally, we can add the main training loop. Note that the backward call is handled by 🤗 Accelerate. We can also see
+how gradient accumulation works: we normalize the loss, so we get the average at the end of accumulation and once we have 
+enough steps we run the optimization. 
+Implementing these optimization techniques with 🤗 Accelerate only takes a handful of lines of code and comes with the 
+benefit of more flexibility in the training loop. For a full documentation of all features have a look at the 
+Accelerate documentation.
+Efficient Software Prebuilds
+PyTorch's pip and conda builds come prebuilt with the cuda toolkit 
+which is enough to run PyTorch, but it is insufficient if you need to build cuda extensions.
+At times, additional efforts may be required to pre-build some components. For instance, if you're using libraries like apex that 
+don't come pre-compiled. In other situations figuring out how to install the right cuda toolkit system-wide can be complicated. 
+To address these scenarios PyTorch and NVIDIA released a new version of NGC docker container which already comes with 
+everything prebuilt. You just need to install your programs on it, and it will run out of the box.
+This approach is also useful if you want to tweak the pytorch source and/or make a new customized build.
+To find the docker image version you want start with PyTorch release notes, 
+choose one of the latest monthly releases. Go into the release's notes for the desired release, check that the environment's 
+components are matching your needs (including NVIDIA Driver requirements!) and then at the very top of that document go 
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f0425f10276f9ff6ed5ec8c07adcc431d581141
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_16.txt
@@ -0,0 +1,9 @@
+to the corresponding NGC page. If for some reason you get lost, here is the index of all PyTorch NGC images.
+Next follow the instructions to download and deploy the docker image.
+Mixture of Experts
+Some recent papers reported a 4-5x training speedup and a faster inference by integrating
+Mixture of Experts (MoE) into the Transformer models.
+Since it has been discovered that more parameters lead to better performance, this technique allows to increase the 
+number of parameters by an order of magnitude without increasing training costs.
+In this approach every other FFN layer is replaced with a MoE Layer which consists of many experts, with a gated function 
+that trains each expert in a balanced way depending on the input token's position in a sequence.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77f88464dda1c6f5aebe8986847ff56c57baf237
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_17.txt
@@ -0,0 +1,17 @@
+(source: GLAM)
+You can find exhaustive details and comparison tables in the papers listed at the end of this section.
+The main drawback of this approach is that it requires staggering amounts of GPU memory - almost an order of magnitude 
+larger than its dense equivalent. Various distillation and approaches are proposed to how to overcome the much higher memory requirements.
+There is direct trade-off though, you can use just a few experts with a 2-3x smaller base model instead of dozens or 
+hundreds experts leading to a 5x smaller model and thus increase the training speed moderately while increasing the 
+memory requirements moderately as well.
+Most related papers and implementations are built around Tensorflow/TPUs:
+
+GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding
+Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
+GLaM: Generalist Language Model (GLaM)
+
+And for Pytorch DeepSpeed has built one as well: DeepSpeed-MoE: Advancing Mixture-of-Experts Inference and Training to Power Next-Generation AI Scale, Mixture of Experts - blog posts:  1, 2 and specific deployment with large transformer-based natural language generation models: blog post, Megatron-Deepspeed branch.
+Using PyTorch native attention and Flash Attention
+PyTorch's torch.nn.functional.scaled_dot_product_attention (SDPA) can also call FlashAttention and memory-efficient attention kernels under the hood. SDPA support is currently being added natively in Transformers and is used by default for torch>=2.1.1 when an implementation is available. Please refer to PyTorch scaled dot product attention for a list of supported models and more details.
+Check out this blogpost to learn more about acceleration and memory-savings with SDPA.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70f631dfeeabd4d9ffeb8943714ae26e0b220896
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+gradients have been accumulated, the model's optimization step is executed. By employing gradient accumulation, it 
+becomes possible to increase the effective batch size beyond the limitations imposed by the GPU's memory capacity. 
+However, it is important to note that the additional forward and backward passes introduced by gradient accumulation can 
+slow down the training process.
+You can enable gradient accumulation by adding the gradient_accumulation_steps argument to  [TrainingArguments]: 
+py
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_accumulation_steps=4, **default_args)
+In the above example, your effective batch size becomes 4. 
+Alternatively, use 🤗 Accelerate to gain full control over the training loop. Find the 🤗 Accelerate example 
+further down in this guide.
+While it is advised to max out GPU usage as much as possible, a high number of gradient accumulation steps can 
+result in a more pronounced training slowdown. Consider the following example. Let's say, the per_device_train_batch_size=4 
+without gradient accumulation hits the GPU's limit. If you would like to train with batches of size 64, do not set the 
+per_device_train_batch_size to 1 and gradient_accumulation_steps to 64. Instead, keep per_device_train_batch_size=4 
+and set gradient_accumulation_steps=16. This results in the same effective batch size while making better use of 
+the available GPU resources.
+For additional information, please refer to batch size and gradient accumulation benchmarks for RTX-3090
+and A100.
+Gradient Checkpointing
+Some large models may still face memory issues even when the batch size is set to 1 and gradient accumulation is used. 
+This is because there are other components that also require memory storage.
+Saving all activations from the forward pass in order to compute the gradients during the backward pass can result in 
+significant memory overhead. The alternative approach of discarding the activations and recalculating them when needed 
+during the backward pass, would introduce a considerable computational overhead and slow down the training process.
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f54a3aecbb26e0f5732468d759a268c3d7f07f1f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_3.txt
@@ -0,0 +1,9 @@
+Gradient checkpointing offers a compromise between these two approaches and saves strategically selected activations 
+throughout the computational graph so only a fraction of the activations need to be re-computed for the gradients. For 
+an in-depth explanation of gradient checkpointing, refer to this great article.
+To enable gradient checkpointing in the [Trainer], pass the corresponding a flag to [TrainingArguments]:
+py
+training_args = TrainingArguments(
+    per_device_train_batch_size=1, gradient_accumulation_steps=4, gradient_checkpointing=True, **default_args
+)
+Alternatively, use 🤗 Accelerate - find the 🤗 Accelerate example further in this guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a78c2350bdcb205c81a55e83da9f656c64d4823d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_4.txt
@@ -0,0 +1 @@
+While gradient checkpointing may improve memory efficiency, it slows training by approximately 20%.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f4fc4cdcd77fbd10dbcd9a744c84566f04fb6b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+Mixed precision training
+Mixed precision training is a technique that aims to optimize the computational efficiency of training models by 
+utilizing lower-precision numerical formats for certain variables. Traditionally, most models use 32-bit floating point 
+precision (fp32 or float32) to represent and process variables. However, not all variables require this high precision 
+level to achieve accurate results. By reducing the precision of certain variables to lower numerical formats like 16-bit 
+floating point (fp16 or float16), we can speed up the computations. Because in this approach some computations are performed 
+in half-precision, while some are still in full precision, the approach is called mixed precision training.
+Most commonly mixed precision training is achieved by using fp16 (float16) data types, however, some GPU architectures 
+(such as the Ampere architecture) offer bf16 and tf32 (CUDA internal data type) data types. Check 
+out the NVIDIA Blog to learn more about 
+the differences between these data types.
+fp16
+The main advantage of mixed precision training comes from saving the activations in half precision (fp16). 
+Although the gradients are also computed in half precision they are converted back to full precision for the optimization 
+step so no memory is saved here. 
+While mixed precision training results in faster computations, it can also lead to more GPU memory being utilized, especially for small batch sizes.
+This is because the model is now present on the GPU in both 16-bit and 32-bit precision (1.5x the original model on the GPU).
+To enable mixed precision training, set the fp16 flag to True:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, fp16=True, **default_args)
+If you prefer to use 🤗 Accelerate, find the 🤗 Accelerate example further in this guide. 
+BF16
+If you have access to an Ampere or newer hardware you can use bf16 for mixed precision training and evaluation. While 
+bf16 has a worse precision than fp16, it has a much bigger dynamic range. In fp16 the biggest number you can have 
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b32de0a697716219e3124857732bed579cb9d5d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_6.txt
@@ -0,0 +1,21 @@
+is 65535 and any number above that will result in an overflow. A bf16 number can be as large as 3.39e+38 (!) which 
+is about the same as fp32 - because both have 8-bits used for the numerical range.
+You can enable BF16 in the 🤗 Trainer with:
+python
+training_args = TrainingArguments(bf16=True, **default_args)
+TF32
+The Ampere hardware uses a magical data type called tf32. It has the same numerical range as fp32 (8-bits), but instead 
+of 23 bits precision it has only 10 bits (same as fp16) and uses only 19 bits in total. It's "magical" in the sense that 
+you can use the normal fp32 training and/or inference code and by enabling tf32 support you can get up to 3x throughput 
+improvement. All you need to do is to add the following to your code:
+python
+import torch
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+CUDA will automatically switch to using tf32 instead of fp32 where possible, assuming that the used GPU is from the Ampere series.
+According to NVIDIA research, the 
+majority of machine learning training workloads show the same perplexity and convergence with tf32 training as with fp32. 
+If you're already using fp16 or bf16 mixed precision it may help with the throughput as well.
+You can enable this mode in the 🤗 Trainer:
+python
+TrainingArguments(tf32=True, **default_args)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d499106c57b0a0eee1de6d4b92d17563de8ee86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_7.txt
@@ -0,0 +1 @@
+tf32 can't be accessed directly via tensor.to(dtype=torch.tf32) because it is an internal CUDA data type. You need torch>=1.7 to use tf32 data types.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d84ff07895c780bed01346f55fff51794074629b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_8.txt
@@ -0,0 +1,23 @@
+For additional information on tf32 vs other precisions, please refer to the following benchmarks: 
+RTX-3090 and
+A100.
+Flash Attention 2
+You can speedup the training throughput by using Flash Attention 2 integration in transformers. Check out the appropriate section in the single GPU section to learn more about how to load a model with Flash Attention 2 modules. 
+Optimizer choice
+The most common optimizer used to train transformer models is Adam or AdamW (Adam with weight decay). Adam achieves 
+good convergence by storing the rolling average of the previous gradients; however, it adds an additional memory 
+footprint of the order of the number of model parameters. To remedy this, you can use an alternative optimizer. 
+For example if you have NVIDIA/apex installed for NVIDIA GPUs, or ROCmSoftwarePlatform/apex for AMD GPUs, adamw_apex_fused will give you the
+fastest training experience among all supported AdamW optimizers.
+[Trainer] integrates a variety of optimizers that can be used out of box: adamw_hf, adamw_torch, adamw_torch_fused, 
+adamw_apex_fused, adamw_anyprecision, adafactor, or adamw_bnb_8bit. More optimizers can be plugged in via a third-party implementation.
+Let's take a closer look at two alternatives to AdamW optimizer:
+1. adafactor which is available in [Trainer]
+2. adamw_bnb_8bit is also available in Trainer, but a third-party integration is provided below for demonstration.
+For comparison, for a 3B-parameter model, like “google-t5/t5-3b”: 
+* A standard AdamW optimizer will need 24GB of GPU memory because it uses 8 bytes for each parameter (83 => 24GB)
+* Adafactor optimizer will need more than 12GB. It uses slightly more than 4 bytes for each parameter, so 43 and then some extra.
+* 8bit BNB quantized optimizer will use only (2*3) 6GB if all optimizer states are quantized.
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a5b268cbc54a7c0b07c4ca86a5f77e965c144e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_gpu_one.txt_chunk_9.txt
@@ -0,0 +1,23 @@
+Adafactor
+Adafactor doesn't store rolling averages for each element in weight matrices. Instead, it keeps aggregated information 
+(sums of rolling averages row- and column-wise), significantly reducing its footprint. However, compared to Adam, 
+Adafactor may have slower convergence in certain cases.
+You can switch to Adafactor by setting optim="adafactor" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adafactor", **default_args)
+Combined with other approaches (gradient accumulation, gradient checkpointing, and mixed precision training) 
+you can notice up to 3x improvement while maintaining the throughput! However, as mentioned before, the convergence of 
+Adafactor can be worse than Adam. 
+8-bit Adam
+Instead of aggregating optimizer states like Adafactor, 8-bit Adam keeps the full state and quantizes it. Quantization 
+means that it stores the state with lower precision and dequantizes it only for the optimization. This is similar to the 
+idea behind mixed precision training.
+To use adamw_bnb_8bit, you simply need to set optim="adamw_bnb_8bit" in [TrainingArguments]:
+py
+training_args = TrainingArguments(per_device_train_batch_size=4, optim="adamw_bnb_8bit", **default_args)
+However, we can also use a third-party implementation of the 8-bit optimizer for demonstration purposes to see how that can be integrated.
+First, follow the installation guide in the GitHub repo to install the bitsandbytes library 
+that implements the 8-bit Adam optimizer.
+Next you need to initialize the optimizer. This involves two steps: 
+* First, group the model's parameters into two groups - one where weight decay should be applied, and the other one where it should not. Usually, biases and layer norm parameters are not weight decayed. 
+* Then do some argument housekeeping to use the same parameters as the previously used AdamW optimizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_special.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_special.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3c49c613bc32f763b8dde05c2caa45fc9461a2bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_special.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+PyTorch training on Apple silicon
+Previously, training models on a Mac was limited to the CPU only. With the release of PyTorch v1.12, you can take advantage of training models with Apple's silicon GPUs for significantly faster performance and training. This is powered in PyTorch by integrating Apple's Metal Performance Shaders (MPS) as a backend. The MPS backend implements PyTorch operations as custom Metal shaders and places these modules on a mps device.
+
+Some PyTorch operations are not implemented in MPS yet and will throw an error. To avoid this, you should set the environment variable PYTORCH_ENABLE_MPS_FALLBACK=1 to use the CPU kernels instead (you'll still see a UserWarning).
+
+If you run into any other errors, please open an issue in the PyTorch repository because the [Trainer] only integrates the MPS backend.
+
+With the mps device set, you can:
+
+train larger networks or batch sizes locally
+reduce data retrieval latency because the GPU's unified memory architecture allows direct access to the full memory store
+reduce costs because you don't need to train on cloud-based GPUs or add additional local GPUs
+
+Get started by making sure you have PyTorch installed. MPS acceleration is supported on macOS 12.3+.
+
+pip install torch torchvision torchaudio
+[TrainingArguments] uses the mps device by default if it's available which means you don't need to explicitly set the device. For example, you can run the run_glue.py script with the MPS backend automatically enabled without making any changes.
+
+export TASK_NAME=mrpc
+python examples/pytorch/text-classification/run_glue.py \
+  --model_name_or_path google-bert/bert-base-cased \
+  --task_name $TASK_NAME \
+- --use_mps_device \
+  --do_train \
+  --do_eval \
+  --max_seq_length 128 \
+  --per_device_train_batch_size 32 \
+  --learning_rate 2e-5 \
+  --num_train_epochs 3 \
+  --output_dir /tmp/$TASK_NAME/ \
+  --overwrite_output_dir
+
+Backends for distributed setups like gloo and nccl are not supported by the mps device which means you can only train on a single GPU with the MPS backend.
+You can learn more about the MPS backend in the Introducing Accelerated PyTorch Training on Mac blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b7b15140f45215d4c6ef7bd54aaf3fb0bf6c2dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Training on TPU with TensorFlow
+
+If you don't need long explanations and just want TPU code samples to get started with, check out our TPU example notebook!
+
+What is a TPU?
+A TPU is a Tensor Processing Unit. They are hardware designed by Google, which are used to greatly speed up the tensor computations within neural networks, much like GPUs. They can be used for both network training and inference. They are generally accessed through Google’s cloud services, but small TPUs can also be accessed directly for free through Google Colab and Kaggle Kernels.
+Because all TensorFlow models in 🤗 Transformers are Keras models, most of the methods in this document are generally applicable to TPU training for any Keras model! However, there are a few points that are specific to the HuggingFace ecosystem (hug-o-system?) of Transformers and Datasets, and we’ll make sure to flag them up when we get to them.
+What kinds of TPU are available?
+New users are often very confused by the range of TPUs, and the different ways to access them. The first key distinction to understand is the difference between TPU Nodes and TPU VMs.
+When you use a TPU Node, you are effectively indirectly accessing a remote TPU. You will need a separate VM, which will initialize your network and data pipeline and then forward them to the remote node. When you use a TPU on Google Colab, you are accessing it in the TPU Node style.
+Using TPU Nodes can have some quite unexpected behaviour for people who aren’t used to them! In particular, because the TPU is located on a physically different system to the machine you’re running your Python code on, your data cannot be local to your machine - any data pipeline that loads from your machine’s internal storage will totally fail! Instead, data must be stored in Google Cloud Storage where your data pipeline can still access it, even when the pipeline is running on the remote TPU node.
+
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b24447026a323fe842384339a1b4f9c507df782d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+If you can fit all your data in memory as np.ndarray or tf.Tensor, then you can fit() on that data even when using Colab or a TPU Node, without needing to upload it to Google Cloud Storage.
+
+🤗Specific Hugging Face Tip🤗: The methods Dataset.to_tf_dataset() and its higher-level wrapper model.prepare_tf_dataset() , which you will see throughout our TF code examples, will both fail on a TPU Node. The reason for this is that even though they create a tf.data.Dataset it is not a “pure” tf.data pipeline and uses tf.numpy_function or Dataset.from_generator() to stream data from the underlying HuggingFace Dataset. This HuggingFace Dataset is backed by data that is on a local disc and which the remote TPU Node will not be able to read.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a724dbf6970d241b6aa305e367940ad3866b188
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+The second way to access a TPU is via a TPU VM. When using a TPU VM, you connect directly to the machine that the TPU is attached to, much like training on a GPU VM. TPU VMs are generally easier to work with, particularly when it comes to your data pipeline. All of the above warnings do not apply to TPU VMs!
+This is an opinionated document, so here’s our opinion: Avoid using TPU Node if possible. It is more confusing and more difficult to debug than TPU VMs. It is also likely to be unsupported in future - Google’s latest TPU, TPUv4, can only be accessed as a TPU VM, which suggests that TPU Nodes are increasingly going to become a “legacy” access method. However, we understand that the only free TPU access is on Colab and Kaggle Kernels, which uses TPU Node - so we’ll try to explain how to handle it if you have to! Check the TPU example notebook for code samples that explain this in more detail.
+What sizes of TPU are available?
+A single TPU (a v2-8/v3-8/v4-8) runs 8 replicas. TPUs exist in pods that can run hundreds or thousands of replicas simultaneously. When you use more than a single TPU but less than a whole pod (for example, a v3-32), your TPU fleet is referred to as a pod slice.
+When you access a free TPU via Colab, you generally get a single v2-8 TPU.
+I keep hearing about this XLA thing. What’s XLA, and how does it relate to TPUs?
+XLA is an optimizing compiler, used by both TensorFlow and JAX. In JAX it is the only compiler, whereas in TensorFlow it is optional (but mandatory on TPU!). The easiest way to enable it when training a Keras model is to pass the argument jit_compile=True to model.compile(). If you don’t get any errors and performance is good, that’s a great sign that you’re ready to move to TPU!
+Debugging on TPU is generally a bit harder than on CPU/GPU, so we recommend getting your code running on CPU/GPU with XLA first before trying it on TPU. You don’t have to train for long, of course - just for a few steps to make sure that your model and data pipeline are working like you expect them to.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..984d58f9f7521e4938971cf3ee1e9868826c2b1e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+XLA compiled code is usually faster - so even if you’re not planning to run on TPU, adding jit_compile=True can improve your performance. Be sure to note the caveats below about XLA compatibility, though!
+
+Tip born of painful experience: Although using jit_compile=True is a good way to get a speed boost and test if your CPU/GPU code is XLA-compatible, it can actually cause a lot of problems if you leave it in when actually training on TPU. XLA compilation will happen implicitly on TPU, so remember to remove that line before actually running your code on a TPU!
+
+How do I make my model XLA compatible?
+In many cases, your code is probably XLA-compatible already! However, there are a few things that work in normal TensorFlow that don’t work in XLA. We’ve distilled them into three core rules below:
+
+🤗Specific HuggingFace Tip🤗: We’ve put a lot of effort into rewriting our TensorFlow models and loss functions to be XLA-compatible. Our models and loss functions generally obey rule #1 and #2 by default, so you can skip over them if you’re using transformers models. Don’t forget about these rules when writing your own models and loss functions, though!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fea3598f5564e9c7fa2fa25581bb142332c612fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+XLA Rule #1: Your code cannot have “data-dependent conditionals”
+What that means is that any if statement cannot depend on values inside a tf.Tensor. For example, this code block cannot be compiled with XLA!
+python
+if tf.reduce_sum(tensor) > 10:
+    tensor = tensor / 2.0
+This might seem very restrictive at first, but most neural net code doesn’t need to do this. You can often get around this restriction by using tf.cond (see the documentation here) or by removing the conditional and finding a clever math trick with indicator variables instead, like so:
+python
+sum_over_10 = tf.cast(tf.reduce_sum(tensor) > 10, tf.float32)
+tensor = tensor / (1.0 + sum_over_10)
+This code has exactly the same effect as the code above, but by avoiding a conditional, we ensure it will compile with XLA without problems!
+XLA Rule #2: Your code cannot have “data-dependent shapes”
+What this means is that the shape of all of the tf.Tensor objects in your code cannot depend on their values. For example, the function tf.unique cannot be compiled with XLA, because it returns a tensor containing one instance of each unique value in the input. The shape of this output will obviously be different depending on how repetitive the input Tensor was, and so XLA refuses to handle it!
+In general, most neural network code obeys rule #2 by default. However, there are a few common cases where it becomes a problem. One very common one is when you use label masking, setting your labels to a negative value to indicate that those positions should be ignored when computing the loss. If you look at NumPy or PyTorch loss functions that support label masking, you will often see code like this that uses boolean indexing:
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3855ed4e2bfdc0ee7aec10d260a7e3e5f06a10a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+python
+label_mask = labels >= 0
+masked_outputs = outputs[label_mask]
+masked_labels = labels[label_mask]
+loss = compute_loss(masked_outputs, masked_labels)
+mean_loss = torch.mean(loss)
+This code is totally fine in NumPy or PyTorch, but it breaks in XLA! Why? Because the shape of masked_outputs and masked_labels depends on how many positions are masked - that makes it a data-dependent shape. However, just like for rule #1, we can often rewrite this code to yield exactly the same output without any data-dependent shapes.
+python
+label_mask = tf.cast(labels >= 0, tf.float32)
+loss = compute_loss(outputs, labels)
+loss = loss * label_mask  # Set negative label positions to 0
+mean_loss = tf.reduce_sum(loss) / tf.reduce_sum(label_mask)
+Here, we avoid data-dependent shapes by computing the loss for every position, but zeroing out the masked positions in both the numerator and denominator when we calculate the mean, which yields exactly the same result as the first block while maintaining XLA compatibility. Note that we use the same trick as in rule #1 - converting a tf.bool to tf.float32 and using it as an indicator variable. This is a really useful trick, so remember it if you need to convert your own code to XLA!
+XLA Rule #3: XLA will need to recompile your model for every different input shape it sees
+This is the big one. What this means is that if your input shapes are very variable, XLA will have to recompile your model over and over, which will create huge performance problems. This commonly arises in NLP models, where input texts have variable lengths after tokenization. In other modalities, static shapes are more common and this rule is much less of a problem.
+How can you get around rule #3? The key is padding - if you pad all your inputs to the same length, and then use an attention_mask, you can get the same results as you’d get from variable shapes, but without any XLA issues. However, excessive padding can cause severe slowdown too - if you pad all your samples to the maximum length in the whole dataset, you might end up with batches consisting endless padding tokens, which will waste a lot of compute and memory!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43f01bcfd7ad31bd65a5f1b8834371fd25aef64b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_6.txt
@@ -0,0 +1 @@
+There isn’t a perfect solution to this problem. However, you can try some tricks. One very useful trick is to pad batches of samples up to a multiple of a number like 32 or 64 tokens. This often only increases the number of tokens by a small amount, but it hugely reduces the number of unique input shapes, because every input shape now has to be a multiple of 32 or 64. Fewer unique input shapes means fewer XLA compilations!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a46d875b38325f1def9d35939f7038a05888c5d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perf_train_tpu_tf.txt_chunk_7.txt
@@ -0,0 +1,17 @@
+🤗Specific HuggingFace Tip🤗: Our tokenizers and data collators have methods that can help you here. You can use padding="max_length" or padding="longest" when calling tokenizers to get them to output padded data. Our tokenizers and data collators also have a pad_to_multiple_of argument that you can use to reduce the number of unique input shapes you see!
+
+How do I actually train my model on TPU?
+Once your training is XLA-compatible and (if you’re using TPU Node / Colab) your dataset has been prepared appropriately, running on TPU is surprisingly easy! All you really need to change in your code is to add a few lines to initialize your TPU, and to ensure that your model and dataset are created inside a TPUStrategy scope. Take a look at our TPU example notebook to see this in action!
+Summary
+There was a lot in here, so let’s summarize with a quick checklist you can follow when you want to get your model ready for TPU training:
+
+Make sure your code follows the three rules of XLA
+Compile your model with jit_compile=True on CPU/GPU and confirm that you can train it with XLA
+Either load your dataset into memory or use a TPU-compatible dataset loading approach (see notebook)
+Migrate your code either to Colab (with accelerator set to “TPU”) or a TPU VM on Google Cloud
+Add TPU initializer code (see notebook)
+Create your TPUStrategy and make sure dataset loading and model creation are inside the strategy.scope() (see notebook)
+Don’t forget to take jit_compile=True out again when you move to TPU!
+🙏🙏🙏🥺🥺🥺
+Call model.fit()
+You did it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c351b48e96466dada10d63d68a08589580633f8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Performance and Scalability
+Training large transformer models and deploying them to production present various challenges.
+During training, the model may require more GPU memory than available or exhibit slow training speed. In the deployment 
+phase, the model can struggle to handle the required throughput in a production environment.
+This documentation aims to assist you in overcoming these challenges and finding the optimal setting for your use-case. 
+The guides are divided into training and inference sections, as each comes with different challenges and solutions. 
+Within each section you'll find separate guides for different hardware configurations, such as single GPU vs. multi-GPU 
+for training or CPU vs. GPU for inference.
+Use this document as your starting point to navigate further to the methods that match your scenario.
+Training
+Training large transformer models efficiently requires an accelerator such as a GPU or TPU. The most common case is where 
+you have a single GPU. The methods that you can apply to improve training efficiency on a single GPU extend to other setups 
+such as multiple GPU. However, there are also techniques that are specific to multi-GPU or CPU training. We cover them in 
+separate sections.
+
+Methods and tools for efficient training on a single GPU: start here to learn common approaches that can help optimize GPU memory utilization, speed up the training, or both. 
+Multi-GPU training section: explore this section to learn about further optimization methods that apply to a multi-GPU settings, such as data, tensor, and pipeline parallelism.
+CPU training section: learn about mixed precision training on CPU.
+Efficient Training on Multiple CPUs: learn about distributed CPU training.
+Training on TPU with TensorFlow: if you are new to TPUs, refer to this section for an opinionated introduction to training on TPUs and using XLA. 
+Custom hardware for training: find tips and tricks when building your own deep learning rig.
+Hyperparameter Search using Trainer API
+
+Inference
+Efficient inference with large models in a production environment can be as challenging as training them. In the following 
+sections we go through the steps to run inference on CPU and single/multi-GPU setups.
+
+Inference on a single CPU
+Inference on a single GPU
+Multi-GPU inference
+XLA Integration for TensorFlow Models
+
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d84e2051011a85efc080345b9fcf56d135e944f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_performance.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Training and inference
+Here you'll find techniques, tips and tricks that apply whether you are training a model, or running inference with it.
+
+Instantiating a big model
+Troubleshooting performance issues
+
+Contribute
+This document is far from being complete and a lot more needs to be added, so if you have additions or corrections to 
+make please don't hesitate to open a PR or if you aren't sure start an Issue and we can discuss the details there.
+When making contributions that A is better than B, please try to include a reproducible benchmark and/or a link to the 
+source of that information (unless it comes directly from you).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..404ea13313e123384d90c48bfa8157d2ef0ab5d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Perplexity of fixed-length models
+[[open-in-colab]]
+Perplexity (PPL) is one of the most common metrics for evaluating language models. Before diving in, we should note
+that the metric applies specifically to classical language models (sometimes called autoregressive or causal language
+models) and is not well defined for masked language models like BERT (see summary of the models).
+Perplexity is defined as the exponentiated average negative log-likelihood of a sequence. If we have a tokenized
+sequence \(X = (x_0, x_1, \dots, x_t)\), then the perplexity of \(X\) is,
+$$\text{PPL}(X) = \exp \left{ {-\frac{1}{t}\sum_i^t \log p_\theta (x_i|x_{<i}) } \right}$$
+where \(\log p_\theta (x_i|x_{<i})\) is the log-likelihood of the ith token conditioned on the preceding tokens \(x_{<i}\) according to our model. Intuitively, it can be thought of as an evaluation of the model's ability to predict uniformly among the set of specified tokens in a corpus. Importantly, this means that the tokenization procedure has a direct impact on a model's perplexity which should always be taken into consideration when comparing different models.
+This is also equivalent to the exponentiation of the cross-entropy between the data and model predictions. For more
+intuition about perplexity and its relationship to Bits Per Character (BPC) and data compression, check out this
+fantastic blog post on The Gradient.
+Calculating PPL with fixed-length models
+If we weren't limited by a model's context size, we would evaluate the model's perplexity by autoregressively
+factorizing a sequence and conditioning on the entire preceding subsequence at each step, as shown below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c6ea05abb24b159e9d342d123fae0b3fd836cb3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+When working with approximate models, however, we typically have a constraint on the number of tokens the model can
+process. The largest version of GPT-2, for example, has a fixed length of 1024 tokens, so we
+cannot calculate \(p_\theta(x_t|x_{<t})\) directly when \(t\) is greater than 1024.
+Instead, the sequence is typically broken into subsequences equal to the model's maximum input size. If a model's max
+input size is \(k\), we then approximate the likelihood of a token \(x_t\) by conditioning only on the
+\(k-1\) tokens that precede it rather than the entire context. When evaluating the model's perplexity of a
+sequence, a tempting but suboptimal approach is to break the sequence into disjoint chunks and add up the decomposed
+log-likelihoods of each segment independently.
+
+This is quick to compute since the perplexity of each segment can be computed in one forward pass, but serves as a poor
+approximation of the fully-factorized perplexity and will typically yield a higher (worse) PPL because the model will
+have less context at most of the prediction steps.
+Instead, the PPL of fixed-length models should be evaluated with a sliding-window strategy. This involves repeatedly
+sliding the context window so that the model has more context when making each prediction.
+
+This is a closer approximation to the true decomposition of the sequence probability and will typically yield a more
+favorable score. The downside is that it requires a separate forward pass for each token in the corpus. A good
+practical compromise is to employ a strided sliding window, moving the context by larger strides rather than sliding by
+1 token a time. This allows computation to proceed much faster while still giving the model a large context to make
+predictions at each step.
+Example: Calculating perplexity with GPT-2 in 🤗 Transformers
+Let's demonstrate this process with GPT-2.
+thon
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+device = "cuda"
+model_id = "openai-community/gpt2-large"
+model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e78103a5690ec2d8016ad122a6a5795a2006318
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_2.txt
@@ -0,0 +1,42 @@
+We'll load in the WikiText-2 dataset and evaluate the perplexity using a few different sliding-window strategies. Since
+this dataset is small and we're just doing one forward pass over the set, we can just load and encode the entire
+dataset in memory.
+thon
+from datasets import load_dataset
+test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")
+
+With 🤗 Transformers, we can simply pass the input_ids as the labels to our model, and the average negative
+log-likelihood for each token is returned as the loss. With our sliding window approach, however, there is overlap in
+the tokens we pass to the model at each iteration. We don't want the log-likelihood for the tokens we're just treating
+as context to be included in our loss, so we can set these targets to -100 so that they are ignored. The following
+is an example of how we could do this with a stride of 512. This means that the model will have at least 512 tokens
+for context when calculating the conditional likelihood of any one token (provided there are 512 preceding tokens
+available to condition on).
+thon
+import torch
+from tqdm import tqdm
+max_length = model.config.n_positions
+stride = 512
+seq_len = encodings.input_ids.size(1)
+nlls = []
+prev_end_loc = 0
+for begin_loc in tqdm(range(0, seq_len, stride)):
+    end_loc = min(begin_loc + max_length, seq_len)
+    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
+    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :-trg_len] = -100
+with torch.no_grad():
+    outputs = model(input_ids, labels=target_ids)
+
+    # loss is calculated using CrossEntropyLoss which averages over valid labels
+    # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels
+    # to the left by 1.
+    neg_log_likelihood = outputs.loss
+
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..954d74134918d021900a3c0e37f9bd6dd0ae9ce7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_perplexity.txt_chunk_3.txt
@@ -0,0 +1,15 @@
+nlls.append(neg_log_likelihood)
+
+prev_end_loc = end_loc
+if end_loc == seq_len:
+    break
+
+ppl = torch.exp(torch.stack(nlls).mean())
+
+Running this with the stride length equal to the max input length is equivalent to the suboptimal, non-sliding-window
+strategy we discussed above. The smaller the stride, the more context the model will have in making each prediction,
+and the better the reported perplexity will typically be.
+When we run the above with stride = 1024, i.e. no overlap, the resulting PPL is 19.44, which is about the same
+as the 19.93 reported in the GPT-2 paper. By using stride = 512 and thereby employing our striding window
+strategy, this jumps down to 16.45. This is not only a more favorable score, but is calculated in a way that is
+closer to the true autoregressive decomposition of a sequence likelihood.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..013ea307c01e4b750a73b5404d3140112b6406e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Philosophy
+🤗 Transformers is an opinionated library built for:
+
+machine learning researchers and educators seeking to use, study or extend large-scale Transformers models.
+hands-on practitioners who want to fine-tune those models or serve them in production, or both.
+engineers who just want to download a pretrained model and use it to solve a given machine learning task.
+
+The library was designed with two strong goals in mind:
+
+Be as easy and fast to use as possible:
+
+We strongly limited the number of user-facing abstractions to learn, in fact, there are almost no abstractions,
+    just three standard classes required to use each model: configuration,
+    models, and a preprocessing class (tokenizer for NLP, image processor for vision, feature extractor for audio, and processor for multimodal inputs).
+
+All of these classes can be initialized in a simple and unified way from pretrained instances by using a common
+    from_pretrained() method which downloads (if needed), caches and
+    loads the related class instance and associated data (configurations' hyperparameters, tokenizers' vocabulary,
+    and models' weights) from a pretrained checkpoint provided on Hugging Face Hub or your own saved checkpoint.
+On top of those three base classes, the library provides two APIs: [pipeline] for quickly
+    using a model for inference on a given task and [Trainer] to quickly train or fine-tune a PyTorch model (all TensorFlow models are compatible with Keras.fit).
+
+As a consequence, this library is NOT a modular toolbox of building blocks for neural nets. If you want to
+    extend or build upon the library, just use regular Python, PyTorch, TensorFlow, Keras modules and inherit from the base
+    classes of the library to reuse functionalities like model loading and saving. If you'd like to learn more about our coding philosophy for models, check out our Repeat Yourself blog post.
+
+Provide state-of-the-art models with performances as close as possible to the original models:
+
+We provide at least one example for each architecture which reproduces a result provided by the official authors
+    of said architecture.
+
+The code is usually as close to the original code base as possible which means some PyTorch code may be not as
+    pytorchic as it could be as a result of being converted TensorFlow code and vice versa.
+
+A few other goals:
+
+Expose the models' internals as consistently as possible:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f98972ca1ec6f708ff2a254bc66e882ca834328
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_philosophy.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+A few other goals:
+
+Expose the models' internals as consistently as possible:
+
+We give access, using a single API, to the full hidden-states and attention weights.
+
+The preprocessing classes and base model APIs are standardized to easily switch between models.
+
+Incorporate a subjective selection of promising tools for fine-tuning and investigating these models:
+
+A simple and consistent way to add new tokens to the vocabulary and embeddings for fine-tuning.
+
+Simple ways to mask and prune Transformer heads.
+
+Easily switch between PyTorch, TensorFlow 2.0 and Flax, allowing training with one framework and inference with another.
+
+Main concepts
+The library is built around three types of classes for each model:
+
+Model classes can be PyTorch models (torch.nn.Module), Keras models (tf.keras.Model) or JAX/Flax models (flax.linen.Module) that work with the pretrained weights provided in the library.
+Configuration classes store the hyperparameters required to build a model (such as the number of layers and hidden size). You don't always need to instantiate these yourself. In particular, if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model).
+Preprocessing classes convert the raw data into a format accepted by the model. A tokenizer stores the vocabulary for each model and provide methods for encoding and decoding strings in a list of token embedding indices to be fed to a model. Image processors preprocess vision inputs, feature extractors preprocess audio inputs, and a processor handles multimodal inputs.
+
+All these classes can be instantiated from pretrained instances, saved locally, and shared on the Hub with three methods:
+
+from_pretrained() lets you instantiate a model, configuration, and preprocessing class from a pretrained version either
+  provided by the library itself (the supported models can be found on the Model Hub) or
+  stored locally (or on a server) by the user.
+save_pretrained() lets you save a model, configuration, and preprocessing class locally so that it can be reloaded using
+  from_pretrained().
+push_to_hub() lets you share a model, configuration, and a preprocessing class to the Hub, so it is easily accessible to everyone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e9069615d2e0d0c7df72d23e19722820aa2f63f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Pipelines for inference
+The [pipeline] makes it simple to use any model from the Hub for inference on any language, computer vision, speech, and multimodal tasks. Even if you don't have experience with a specific modality or aren't familiar with the underlying code behind the models, you can still use them for inference with the [pipeline]! This tutorial will teach you to:
+
+Use a [pipeline] for inference.
+Use a specific tokenizer or model.
+Use a [pipeline] for audio, vision, and multimodal tasks.
+
+Take a look at the [pipeline] documentation for a complete list of supported tasks and available parameters.
+
+Pipeline usage
+While each task has an associated [pipeline], it is simpler to use the general [pipeline] abstraction which contains 
+all the task-specific pipelines. The [pipeline] automatically loads a default model and a preprocessing class capable 
+of inference for your task. Let's take the example of using the [pipeline] for automatic speech recognition (ASR), or
+speech-to-text.
+
+Start by creating a [pipeline] and specify the inference task:
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition")
+
+Pass your input to the [pipeline]. In the case of speech recognition, this is an audio input file:
+
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': 'I HAVE A DREAM BUT ONE DAY THIS NATION WILL RISE UP LIVE UP THE TRUE MEANING OF ITS TREES'}
+
+Not the result you had in mind? Check out some of the most downloaded automatic speech recognition models 
+on the Hub to see if you can get a better transcription.
+Let's try the Whisper large-v2 model from OpenAI. Whisper was released 
+2 years later than Wav2Vec2, and was trained on close to 10x more data. As such, it beats Wav2Vec2 on most downstream 
+benchmarks. It also has the added benefit of predicting punctuation and casing, neither of which are possible with
+Wav2Vec2.
+Let's give it a try here to see how it performs:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..240e1d89eaa932a2b52bee2c7cf49f5eb3ee23a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+transcriber = pipeline(model="openai/whisper-large-v2")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Now this result looks more accurate! For a deep-dive comparison on Wav2Vec2 vs Whisper, refer to the Audio Transformers Course.
+We really encourage you to check out the Hub for models in different languages, models specialized in your field, and more.
+You can check out and compare model results directly from your browser on the Hub to see if it fits or 
+handles corner cases better than other ones.
+And if you don't find a model for your use case, you can always start training your own!
+If you have several inputs, you can pass your input as a list:
+py
+transcriber(
+    [
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac",
+        "https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac",
+    ]
+)
+Pipelines are great for experimentation as switching from one model to another is trivial; however, there are some ways to optimize them for larger workloads than experimentation. See the following guides that dive into iterating over whole datasets or using pipelines in a webserver:
+of the docs:
+* Using pipelines on a dataset
+* Using pipelines for a webserver
+Parameters
+[pipeline] supports many parameters; some are task specific, and some are general to all pipelines.
+In general, you can specify parameters anywhere you want:
+
+transcriber = pipeline(model="openai/whisper-large-v2", my_parameter=1)
+out = transcriber()  # This will use my_parameter=1.
+out = transcriber(, my_parameter=2)  # This will override and use my_parameter=2.
+out = transcriber()  # This will go back to using my_parameter=1.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99bcc2203021cb7f3bf19d797a77c625eeb0ca86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Let's check out 3 important ones:
+Device
+If you use device=n, the pipeline automatically puts the model on the specified device.
+This will work regardless of whether you are using PyTorch or Tensorflow.
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0)
+If the model is too large for a single GPU and you are using PyTorch, you can set device_map="auto" to automatically 
+determine how to load and store the model weights. Using the device_map argument requires the 🤗 Accelerate
+package:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc44c49a624e7f1dfca407a8459c10f860c35f49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+pip install --upgrade accelerate
+The following code automatically loads and stores model weights across devices:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device_map="auto")
+Note that if  device_map="auto" is passed, there is no need to add the argument device=device when instantiating your pipeline as you may encounter some unexpected behavior!
+Batch size
+By default, pipelines will not batch inference for reasons explained in detail here. The reason is that batching is not necessarily faster, and can actually be quite slower in some cases.
+But if it works in your use case, you can use:
+py
+transcriber = pipeline(model="openai/whisper-large-v2", device=0, batch_size=2)
+audio_filenames = [f"https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/{i}.flac" for i in range(1, 5)]
+texts = transcriber(audio_filenames)
+This runs the pipeline on the 4 provided audio files, but it will pass them in batches of 2
+to the model (which is on a GPU, where batching is more likely to help) without requiring any further code from you. 
+The output should always match what you would have received without batching. It is only meant as a way to help you get more speed out of a pipeline.
+Pipelines can also alleviate some of the complexities of batching because, for some pipelines, a single item (like a long audio file) needs to be chunked into multiple parts to be processed by a model. The pipeline performs this chunk batching for you.
+Task specific parameters
+All tasks provide task specific parameters which allow for additional flexibility and options to help you get your job done.
+For instance, the [transformers.AutomaticSpeechRecognitionPipeline.__call__] method has a return_timestamps parameter which sounds promising for subtitling videos:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a38df389d29e77dccd8a6ee8fef8220466e865bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_4.txt
@@ -0,0 +1,10 @@
+transcriber = pipeline(model="openai/whisper-large-v2", return_timestamps=True)
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.', 'chunks': [{'timestamp': (0.0, 11.88), 'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its'}, {'timestamp': (11.88, 12.38), 'text': ' creed.'}]}
+
+As you can see, the model inferred the text and also outputted when the various sentences were pronounced.
+There are many parameters available for each task, so check out each task's API reference to see what you can tinker with!
+For instance, the [~transformers.AutomaticSpeechRecognitionPipeline] has a chunk_length_s parameter which is helpful 
+for working on really long audio files (for example, subtitling entire movies or hour-long videos) that a model typically 
+cannot handle on its own:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e5855078ee31b7389a0eac8d8ab7e8a8e913aa5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+transcriber = pipeline(model="openai/whisper-large-v2", chunk_length_s=30)
+transcriber("https://huggingface.co/datasets/reach-vb/random-audios/resolve/main/ted_60.wav")
+{'text': " So in college, I was a government major, which means I had to write a lot of papers. Now, when a normal student writes a paper, they might spread the work out a little like this. So, you know. You get started maybe a little slowly, but you get enough done in the first week that with some heavier days later on, everything gets done and things stay civil. And I would want to do that like that. That would be the plan. I would have it all ready to go, but then actually the paper would come along, and then I would kind of do this. And that would happen every single paper. But then came my 90-page senior thesis, a paper you're supposed to spend a year on. I knew for a paper like that, my normal workflow was not an option, it was way too big a project. So I planned things out and I decided I kind of had to go something like this. This is how the year would go. So I'd start off light and I'd bump it up"}
+
+If you can't find a parameter that would really help you out, feel free to request it!
+Using pipelines on a dataset
+The pipeline can also run inference on a large dataset. The easiest way we recommend doing this is by using an iterator:
+
+def data():
+    for i in range(1000):
+        yield f"My example {i}"
+pipe = pipeline(model="openai-community/gpt2", device=0)
+generated_characters = 0
+for out in pipe(data()):
+    generated_characters += len(out[0]["generated_text"])
+
+The iterator data() yields each result, and the pipeline automatically
+recognizes the input is iterable and will start fetching the data while
+it continues to process it on the GPU (this uses DataLoader under the hood).
+This is important because you don't have to allocate memory for the whole dataset
+and you can feed the GPU as fast as possible.
+Since batching could speed things up, it may be useful to try tuning the batch_size parameter here.
+The simplest way to iterate over a dataset is to just load one from 🤗 Datasets:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f34da086028ff4f84371300bfc77347ec8d4cfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_6.txt
@@ -0,0 +1,29 @@
+KeyDataset is a util that will just output the item we're interested in.
+from transformers.pipelines.pt_utils import KeyDataset
+from datasets import load_dataset
+pipe = pipeline(model="hf-internal-testing/tiny-random-wav2vec2", device=0)
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
+for out in pipe(KeyDataset(dataset, "audio")):
+    print(out)
+
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic which deserves it's own
+page.
+
+Link
+Vision pipeline
+Using a [pipeline] for vision tasks is practically identical.
+Specify your task and pass your image to the classifier. The image can be a link, a local path or a base64-encoded image. For example, what species of cat is shown below?
+
+from transformers import pipeline
+vision_classifier = pipeline(model="google/vit-base-patch16-224")
+preds = vision_classifier(
+     images="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4335, 'label': 'lynx, catamount'}, {'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}, {'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}, {'score': 0.0239, 'label': 'Egyptian cat'}, {'score': 0.0229, 'label': 'tiger cat'}]
+
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46e0bc3745c9be0e1cc54141773f6ab2de47e565
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_7.txt
@@ -0,0 +1,35 @@
+Text pipeline
+Using a [pipeline] for NLP tasks is practically identical.
+
+from transformers import pipeline
+This model is a zero-shot-classification model.
+It will classify text, except you are free to choose any label you might imagine
+classifier = pipeline(model="facebook/bart-large-mnli")
+classifier(
+     "I have a problem with my iphone that needs to be resolved asap!!",
+     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
+ )
+{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
+
+Multimodal pipeline
+The [pipeline] supports more than one modality. For example, a visual question answering (VQA) task combines text and image. Feel free to use any image link you like and a question you want to ask about the image. The image can be a URL or a local path to the image.
+For example, if you use this invoice image:
+
+from transformers import pipeline
+vqa = pipeline(model="impira/layoutlm-document-qa")
+output = vqa(
+     image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png",
+     question="What is the invoice number?",
+ )
+output[0]["score"] = round(output[0]["score"], 3)
+output
+[{'score': 0.425, 'answer': 'us-001', 'start': 16, 'end': 16}]
+
+To run the example above you need to have pytesseract installed in addition to 🤗 Transformers:
+
+sudo apt install -y tesseract-ocr
+pip install pytesseract
+
+Using pipeline on large models with 🤗 accelerate:
+You can easily run pipeline on large models using 🤗 accelerate! First make sure you have installed accelerate with pip install accelerate. 
+First load your model using device_map="auto"! We will use facebook/opt-1.3b for our example.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..052aa7cfd354dc76035b9f18403a78d358b67022
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_tutorial.txt_chunk_8.txt
@@ -0,0 +1,27 @@
+pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device_map="auto")
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+You can also pass 8-bit loaded models if you install bitsandbytes and add the argument load_in_8bit=True
+
+pip install accelerate bitsandbytes
+import torch
+from transformers import pipeline
+pipe = pipeline(model="facebook/opt-1.3b", device_map="auto", model_kwargs={"load_in_8bit": True})
+output = pipe("This is a cool example!", do_sample=True, top_p=0.95)
+
+Note that you can replace the checkpoint with any Hugging Face model that supports large model loading, such as BLOOM.
+Creating web demos from pipelines with gradio
+Pipelines are automatically supported in Gradio, a library that makes creating beautiful and user-friendly machine learning apps on the web a breeze. First, make sure you have Gradio installed:
+pip install gradio
+Then, you can create a web demo around an image classification pipeline (or any other pipeline) in a single line of code by calling Gradio's Interface.from_pipeline function to launch the pipeline. This creates an intuitive drag-and-drop interface in your browser:
+
+from transformers import pipeline
+import gradio as gr
+pipe = pipeline("image-classification", model="google/vit-base-patch16-224")
+gr.Interface.from_pipeline(pipe).launch()
+
+By default, the web demo runs on a local server. If you'd like to share it with others, you can generate a temporary public
+link by setting share=True in launch(). You can also host your demo on Hugging Face Spaces for a permanent link.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdbc27e4186a8806b449022774aac7a1a62b12d2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Using pipelines for a webserver
+
+Creating an inference engine is a complex topic, and the "best" solution 
+will most likely depend on your problem space. Are you on CPU or GPU? Do
+you want the lowest latency, the highest throughput, support for
+many models, or just highly optimize 1 specific model?
+There are many ways to tackle this topic, so what we are going to present is a good default
+to get started which may not necessarily be the most optimal solution for you.
+
+The key thing to understand is that we can use an iterator, just like you would on a
+dataset, since a webserver is basically a system that waits for requests and
+treats them as they come in.
+Usually webservers are multiplexed (multithreaded, async, etc..) to handle various
+requests concurrently. Pipelines on the other hand (and mostly the underlying models)
+are not really great for parallelism; they take up a lot of RAM, so it's best to give them all the available resources when they are running or it's a compute-intensive job.
+We are going to solve that by having the webserver handle the light load of receiving
+and sending requests, and having a single thread handling the actual work.
+This example is going to use starlette. The actual framework is not really
+important, but you might have to tune or change the code if you are using another
+one to achieve the same effect.
+Create server.py:
+
+from starlette.applications import Starlette
+from starlette.responses import JSONResponse
+from starlette.routing import Route
+from transformers import pipeline
+import asyncio
+async def homepage(request):
+    payload = await request.body()
+    string = payload.decode("utf-8")
+    response_q = asyncio.Queue()
+    await request.app.model_queue.put((string, response_q))
+    output = await response_q.get()
+    return JSONResponse(output)
+async def server_loop(q):
+    pipe = pipeline(model="google-bert/bert-base-uncased")
+    while True:
+        (string, response_q) = await q.get()
+        out = pipe(string)
+        await response_q.put(out)
+app = Starlette(
+    routes=[
+        Route("/", homepage, methods=["POST"]),
+    ],
+)
+@app.on_event("startup")
+async def startup_event():
+    q = asyncio.Queue()
+    app.model_queue = q
+    asyncio.create_task(server_loop(q))
+
+Now you can start it with:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87edb303cdbe10a0c39e690cede046bc90213073
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Now you can start it with:
+
+uvicorn server:app
+And you can query it:
+```bash
+curl -X POST -d "test [MASK]" http://localhost:8000/
+[{"score":0.7742936015129089,"token":1012,"token_str":".","sequence":"test."},]
+
+And there you go, now you have a good idea of how to create a webserver!
+What is really important is that we load the model only once, so there are no copies
+of the model on the webserver. This way, no unnecessary RAM is being used.
+Then the queuing mechanism allows you to do fancy stuff like maybe accumulating a few
+items before inferring to use dynamic batching:
+
+The code sample below is intentionally written like pseudo-code for readability.
+Do not run this without checking if it makes sense for your system resources!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa18629f4f150b5e5c55af4769672b84a190febb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_2.txt
@@ -0,0 +1,41 @@
+py
+(string, rq) = await q.get()
+strings = []
+queues = []
+while True:
+    try:
+        (string, rq) = await asyncio.wait_for(q.get(), timeout=0.001)  # 1ms
+    except asyncio.exceptions.TimeoutError:
+        break
+    strings.append(string)
+    queues.append(rq)
+strings
+outs = pipe(strings, batch_size=len(strings))
+for rq, out in zip(queues, outs):
+    await rq.put(out)
+Again, the proposed code is optimized for readability, not for being the best code.
+First of all, there's no batch size limit which is usually not a 
+great idea. Next, the timeout is reset on every queue fetch, meaning you could
+wait much more than 1ms before running the inference (delaying the first request 
+by that much). 
+It would be better to have a single 1ms deadline.
+This will always wait for 1ms even if the queue is empty, which might not be the
+best since you probably want to start doing inference if there's nothing in the queue.
+But maybe it does make sense if batching is really crucial for your use case.
+Again, there's really no one best solution.
+Few things you might want to consider
+Error checking
+There's a lot that can go wrong in production: out of memory, out of space,
+loading the model might fail, the query might be wrong, the query might be
+correct but still fail to run because of a model misconfiguration, and so on.
+Generally, it's good if the server outputs the errors to the user, so
+adding a lot of try..except statements to show those errors is a good
+idea. But keep in mind it may also be a security risk to reveal all those errors depending 
+on your security context.
+Circuit breaking
+Webservers usually look better when they do circuit breaking. It means they 
+return proper errors when they're overloaded instead of just waiting for the query indefinitely. Return a 503 error instead of waiting for a super long time or a 504 after a long time.
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae5fd73d2011526649a8fe0e1ba5ee12a847d923
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pipeline_webserver.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+This is relatively easy to implement in the proposed code since there is a single queue.
+Looking at the queue size is a basic way to start returning errors before your 
+webserver fails under load.
+Blocking the main thread
+Currently PyTorch is not async aware, and computation will block the main
+thread while running. That means it would be better if PyTorch was forced to run
+on its own thread/process. This wasn't done here because the code is a lot more
+complex (mostly because threads and async and queues don't play nice together).
+But ultimately it does the same thing.
+This would be important if the inference of single items were long (> 1s) because 
+in this case, it means every query during inference would have to wait for 1s before
+even receiving an error.
+Dynamic batching
+In general, batching is not necessarily an improvement over passing 1 item at 
+a time (see batching details for more information). But it can be very effective
+when used in the correct setting. In the API, there is no dynamic
+batching by default (too much opportunity for a slowdown). But for BLOOM inference -
+which is a very large model - dynamic batching is essential to provide a decent experience for everyone.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf41afcbba85290a49a218e8926f250a0a96b689
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Check copies
+Since the Transformers library is very opinionated with respect to model code, and each model should fully be implemented in a single file without relying on other models, we have added a mechanism that checks whether a copy of the code of a layer of a given model stays consistent with the original. This way, when there is a bug fix, we can see all other impacted models and choose to trickle down the modification or break the copy.
+
+If a file is a full copy of another file, you should register it in the constant FULL_COPIES of utils/check_copies.py.
+
+This mechanism relies on comments of the form # Copied from xxx. The xxx should contain the whole path to the class of function which is being copied below. For instance, RobertaSelfOutput is a direct copy of the BertSelfOutput class, so you can see here it has a comment:
+
+Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+
+Note that instead of applying this to a whole class, you can apply it to the relevant methods that are copied from. For instance here you can see how RobertaPreTrainedModel._init_weights is copied from the same method in BertPreTrainedModel with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+
+Sometimes the copy is exactly the same except for names: for instance in RobertaAttention, we use RobertaSelfAttention insted of BertSelfAttention but other than that, the code is exactly the same. This is why # Copied from supports simple string replacements with the following syntax: Copied from xxx with foo->bar. This means the code is copied with all instances of foo being replaced by bar. You can see how it used here in RobertaAttention with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+
+Note that there shouldn't be any spaces around the arrow (unless that space is part of the pattern to replace of course).
+You can add several patterns separated by a comma. For instance here CamemberForMaskedLM is a direct copy of RobertaForMaskedLM with two replacements: Roberta to Camembert and ROBERTA to CAMEMBERT. You can see here this is done with the comment:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..514824f1c9924c725a45c1f641547116fe78ff4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_pr_checks.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Copied from transformers.models.roberta.modeling_roberta.RobertaForMaskedLM with Roberta->Camembert, ROBERTA->CAMEMBERT
+
+If the order matters (because one of the replacements might conflict with a previous one), the replacements are executed from left to right.
+
+If the replacements change the formatting (if you replace a short name by a very long name for instance), the copy is checked after applying the auto-formatter.
+
+Another way when the patterns are just different casings of the same replacement (with an uppercased and a lowercased variants) is just to add the option all-casing. Here is an example in MobileBertForSequenceClassification with the comment:
+
+Copied from transformers.models.bert.modeling_bert.BertForSequenceClassification with Bert->MobileBert all-casing
+
+In this case, the code is copied from BertForSequenceClassification by replacing:
+- Bert by MobileBert (for instance when using MobileBertModel in the init)
+- bert by mobilebert (for instance when defining self.mobilebert)
+- BERT by MOBILEBERT (in the constant MOBILEBERT_INPUTS_DOCSTRING)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..999ec4097ba6ffe1fffdbbea96d637569b95c788
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Preprocess
+[[open-in-colab]]
+Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:
+
+Text, use a Tokenizer to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.
+Speech and audio, use a Feature extractor to extract sequential features from audio waveforms and convert them into tensors.
+Image inputs use a ImageProcessor to convert images into tensors.
+Multimodal inputs, use a Processor to combine a tokenizer and a feature extractor or image processor.
+
+AutoProcessor always works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.
+
+Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:
+
+pip install datasets
+Natural Language Processing
+
+The main tool for preprocessing textual data is a tokenizer. A tokenizer splits text into tokens according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.
+
+If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the vocab) during pretraining.
+
+Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained] method. This downloads the vocab a model was pretrained with:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..115ce7afd8b68d223f79aaf7002b541899b540ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+
+Then pass your text to the tokenizer:
+
+encoded_input = tokenizer("Do not meddle in the affairs of wizards, for they are subtle and quick to anger.")
+print(encoded_input)
+{'input_ids': [101, 2079, 2025, 19960, 10362, 1999, 1996, 3821, 1997, 16657, 1010, 2005, 2027, 2024, 11259, 1998, 4248, 2000, 4963, 1012, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary with three important items:
+
+input_ids are the indices corresponding to each token in the sentence.
+attention_mask indicates whether a token should be attended to or not.
+token_type_ids identifies which sequence a token belongs to when there is more than one sequence.
+
+Return your input by decoding the input_ids:
+
+tokenizer.decode(encoded_input["input_ids"])
+'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'
+
+As you can see, the tokenizer added two special tokens - CLS and SEP (classifier and separator) - to the sentence. Not all models need
+special tokens, but if they do, the tokenizer automatically adds them for you.
+If there are several sentences you want to preprocess, pass them as a list to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d680f9d2f6b9cef9f8019244421aac1dce667137
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_10.txt
@@ -0,0 +1,26 @@
+Use 🤗 Datasets' [~datasets.Dataset.cast_column] method to upsample the sampling rate to 16kHz:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
+
+Call the audio column again to resample the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 2.3443763e-05,  2.1729663e-04,  2.2145823e-04, ,
+         3.8356509e-05, -7.3497440e-06, -2.1754686e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 16000}
+
+Next, load a feature extractor to normalize and pad the input. When padding textual data, a 0 is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a 0 - interpreted as silence - to array.
+Load the feature extractor with [AutoFeatureExtractor.from_pretrained]:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+Pass the audio array to the feature extractor. We also recommend adding the sampling_rate argument in the feature extractor in order to better debug any silent errors that may occur.
+
+audio_input = [dataset[0]["audio"]["array"]]
+feature_extractor(audio_input, sampling_rate=16000)
+{'input_values': [array([ 3.8106556e-04,  2.7506407e-03,  2.8015103e-03, ,
+        5.6335266e-04,  4.6588284e-06, -1.7142107e-04], dtype=float32)]}
+
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..717b8dd3994d765992f875b11ce6bfcf2abafbfb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_11.txt
@@ -0,0 +1,45 @@
+Just like the tokenizer, you can apply padding or truncation to handle variable sequences in a batch. Take a look at the sequence length of these two audio samples:
+
+dataset[0]["audio"]["array"].shape
+(173398,)
+dataset[1]["audio"]["array"].shape
+(106496,)
+
+Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays,
+         sampling_rate=16000,
+         padding=True,
+         max_length=100000,
+         truncation=True,
+     )
+     return inputs
+
+Apply the preprocess_function to the first few examples in the dataset:
+
+processed_dataset = preprocess_function(dataset[:5])
+
+The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!
+
+processed_dataset["input_values"][0].shape
+(100000,)
+processed_dataset["input_values"][1].shape
+(100000,)
+
+Computer vision
+For computer vision tasks, you'll need an image processor to prepare your dataset for the model.
+Image preprocessing consists of several steps that convert images into the input expected by the model. These steps
+include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.
+
+Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation
+transform image data, but they serve different purposes:
+
+Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.
+Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.
+
+You can use any library you like for image augmentation. For image preprocessing, use the ImageProcessor associated with the model.
+
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c8458192e41b0345fa9cb64028e192220f1db81
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_12.txt
@@ -0,0 +1,39 @@
+Load the food101 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:
+
+Use 🤗 Datasets split parameter to only load a small sample from the training split since the dataset is quite large!
+
+from datasets import load_dataset
+dataset = load_dataset("food101", split="train[:100]")
+
+Next, take a look at the image with 🤗 Datasets Image feature:
+
+dataset[0]["image"]
+
+Load the image processor with [AutoImageProcessor.from_pretrained]:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+
+First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's transforms module. If you're interested in using another data augmentation library, learn how in the Albumentations or Kornia notebooks.
+
+Here we use Compose to chain together a couple of
+transforms - RandomResizedCrop and ColorJitter.
+Note that for resizing, we can get the image size requirements from the image_processor. For some models, an exact height and
+width are expected, for others only the shortest_edge is defined.
+
+from torchvision.transforms import RandomResizedCrop, ColorJitter, Compose
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=0.5, hue=0.5)])
+
+The model accepts pixel_values
+as its input. ImageProcessor can take care of normalizing the images, and generating appropriate tensors.
+Create a function that combines image augmentation and image preprocessing for a batch of images and generates pixel_values:
+
+def transforms(examples):
+     images = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     examples["pixel_values"] = image_processor(images, do_resize=False, return_tensors="pt")["pixel_values"]
+     return examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a463cca659a3c7e55f1388957a9ccece0d79372
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_13.txt
@@ -0,0 +1,43 @@
+In the example above we set do_resize=False because we have already resized the images in the image augmentation transformation,
+and leveraged the size attribute from the appropriate image_processor. If you do not resize images during image augmentation,
+leave this parameter out. By default, ImageProcessor will handle the resizing.
+If you wish to normalize images as a part of the augmentation transformation, use the image_processor.image_mean,
+and image_processor.image_std values.
+
+Then use 🤗 Datasets[~datasets.Dataset.set_transform] to apply the transforms on the fly:
+
+dataset.set_transform(transforms)
+
+Now when you access the image, you'll notice the image processor has added pixel_values. You can pass your processed dataset to the model now!
+
+dataset[0].keys()
+
+Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different.
+
+import numpy as np
+import matplotlib.pyplot as plt
+img = dataset[0]["pixel_values"]
+plt.imshow(img.permute(1, 2, 0))
+
+For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, ImageProcessor
+offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,
+or segmentation maps.
+
+Pad
+In some cases, for instance, when fine-tuning DETR, the model applies scale augmentation at training
+time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad]
+from [DetrImageProcessor] and define a custom collate_fn to batch images together.
+
+def collate_fn(batch):
+     pixel_values = [item["pixel_values"] for item in batch]
+     encoding = image_processor.pad(pixel_values, return_tensors="pt")
+     labels = [item["labels"] for item in batch]
+     batch = {}
+     batch["pixel_values"] = encoding["pixel_values"]
+     batch["pixel_mask"] = encoding["pixel_mask"]
+     batch["labels"] = labels
+     return batch
+
+Multimodal
+For tasks involving multimodal inputs, you'll need a processor to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.
+Load the LJ Speech dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17a309c957bfa333d09b28c559cd02a8d4efa2c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_14.txt
@@ -0,0 +1,39 @@
+from datasets import load_dataset
+lj_speech = load_dataset("lj_speech", split="train")
+
+For ASR, you're mainly focused on audio and text so you can remove the other columns:
+
+lj_speech = lj_speech.map(remove_columns=["file", "id", "normalized_text"])
+
+Now take a look at the audio and text columns:
+
+lj_speech[0]["audio"]
+{'array': array([-7.3242188e-04, -7.6293945e-04, -6.4086914e-04, ,
+         7.3242188e-04,  2.1362305e-04,  6.1035156e-05], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav',
+ 'sampling_rate': 22050}
+lj_speech[0]["text"]
+'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'
+
+Remember you should always resample your audio dataset's sampling rate to match the sampling rate of the dataset used to pretrain a model!
+
+lj_speech = lj_speech.cast_column("audio", Audio(sampling_rate=16_000))
+
+Load a processor with [AutoProcessor.from_pretrained]:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
+
+Create a function to process the audio data contained in array to input_values, and tokenize text to labels. These are the inputs to the model:
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example.update(processor(audio=audio["array"], text=example["text"], sampling_rate=16000))
+     return example
+
+Apply the prepare_dataset function to a sample:
+
+prepare_dataset(lj_speech[0])
+
+The processor has now added input_values and labels, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34fbec8de116cde941bf66b1e0386606220e67cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_inputs = tokenizer(batch_sentences)
+print(encoded_inputs)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1]]}
+
+Pad
+Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special padding token to shorter sentences.
+Set the padding parameter to True to pad the shorter sequences in the batch to match the longest sequence:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cf66a2ad47430bda7348c3ea85d05eb4992243e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_3.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cba201de819b2743b0973ae9b6ad6dff18f06d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_4.txt
@@ -0,0 +1,4 @@
+The first and third sentences are now padded with 0's because they are shorter.
+Truncation
+On the other end of the spectrum, sometimes a sequence may be too long for a model to handle. In this case, you'll need to truncate the sequence to a shorter length.
+Set the truncation parameter to True to truncate a sequence to the maximum length accepted by the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4209d043dbf94969845c94903013115bdd6b233e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True)
+print(encoded_input)
+{'input_ids': [[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+               [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+               [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
+ 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5387f8bbcf3cbbfaaff4c8ef33cf720f5832958
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_6.txt
@@ -0,0 +1,5 @@
+Check out the Padding and truncation concept guide to learn more different padding and truncation arguments.
+
+Build tensors
+Finally, you want the tokenizer to return the actual tensors that get fed to the model.
+Set the return_tensors parameter to either pt for PyTorch, or tf for TensorFlow:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb30d8ed04d8862d01061ac10b0d36a61f23ed90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_7.txt
@@ -0,0 +1,15 @@
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="pt")
+print(encoded_input)
+{'input_ids': tensor([[101, 1252, 1184, 1164, 1248, 6462, 136, 102, 0, 0, 0, 0, 0, 0, 0],
+                      [101, 1790, 112, 189, 1341, 1119, 3520, 1164, 1248, 6462, 117, 21902, 1643, 119, 102],
+                      [101, 1327, 1164, 5450, 23434, 136, 102, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),
+ 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+                           [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2257ae64a9122dd5691dbe993d5a13b8d7ac8ba2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_8.txt
@@ -0,0 +1,14 @@
+[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                           [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}
+</pt>
+<tf>py
+batch_sentences = [
+     "But what about second breakfast?",
+     "Don't think he knows about second breakfast, Pip.",
+     "What about elevensies?",
+ ]
+encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors="tf")
+print(encoded_input)
+{'input_ids': ,
+ 'token_type_ids': ,
+ 'attention_mask': }
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32eac1a03b9740425aceb6320f227f8b1f783e55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_preprocessing.txt_chunk_9.txt
@@ -0,0 +1,26 @@
+Different pipelines support tokenizer arguments in their __call__() differently. text-2-text-generation pipelines support (i.e. pass on)
+only truncation. text-generation pipelines support max_length, truncation, padding and add_special_tokens. 
+In fill-mask pipelines, tokenizer arguments can be passed in the tokenizer_kwargs argument (dictionary).
+
+Audio
+For audio tasks, you'll need a feature extractor to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.
+Load the MInDS-14 dataset (see the 🤗 Datasets tutorial for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Access the first element of the audio column to take a look at the input. Calling the audio column automatically loads and resamples the audio file:
+
+dataset[0]["audio"]
+{'array': array([ 0.        ,  0.00024414, -0.00024414, , -0.00024414,
+         0.        ,  0.        ], dtype=float32),
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',
+ 'sampling_rate': 8000}
+
+This returns three items:
+
+array is the speech signal loaded - and potentially resampled - as a 1D array.
+path points to the location of the audio file.
+sampling_rate refers to how many data points in the speech signal are measured per second.
+
+For this tutorial, you'll use the Wav2Vec2 model. Take a look at the model card, and you'll learn Wav2Vec2 is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..195b85f5354a0660331a4880c4eb5e4a619ab347
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Quick tour
+[[open-in-colab]]
+Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline] for inference, load a pretrained model and preprocessor with an AutoClass, and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or course next for more in-depth explanations of the concepts introduced here.
+Before you begin, make sure you have all the necessary libraries installed:
+
+!pip install transformers datasets evaluate accelerate
+You'll also need to install your preferred machine learning framework:
+
+pip install torch
+
+pip install tensorflow
+
+Pipeline
+
+The [pipeline] is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline] out-of-the-box for many tasks across different modalities, some of which are shown in the table below:
+
+For a complete list of available tasks, check out the pipeline API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c86cdcab6f2f3a9ac1e47ff2dffbbbefc06e9567
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+For a complete list of available tasks, check out the pipeline API reference.
+
+| Task                     | Description                                                                                              | Modality    | Pipeline identifier                       |
+|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|
+| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |
+| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |
+| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |
+| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |
+| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |
+| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |
+| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |
+| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |
+| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |
+| Document question answering  | answer a question about the document, given a document and a question                                        | Multimodal      | pipeline(task="document-question-answering")  |
+| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task="image-to-text")                |
+Start by creating an instance of [pipeline] and specifying a task you want to use it for. In this guide, you'll use the [pipeline] for sentiment analysis as an example:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis")
+
+The [pipeline] downloads and caches a default pretrained model and tokenizer for sentiment analysis. Now you can use the classifier on your target text:
+
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f62c4c0b1fb5a789697e4404fd0bb7919c7bbf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_10.txt
@@ -0,0 +1,33 @@
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
+
+You can customize the training loop behavior by subclassing the methods inside [Trainer]. This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer] reference for which methods can be subclassed. 
+The other way to customize the training loop is by using Callbacks. You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer] instead.
+Train with TensorFlow
+All models are a standard tf.keras.Model so they can be trained in TensorFlow with the Keras API. 🤗 Transformers provides the [~TFPreTrainedModel.prepare_tf_dataset] method to easily load your dataset as a tf.data.Dataset so you can start training right away with Keras' compile and fit methods.
+
+You'll start with a [TFPreTrainedModel] or a tf.keras.Model:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])  # doctest: +SKIP
+   
+
+Apply the tokenizer over the entire dataset with [~datasets.Dataset.map] and then pass the dataset and tokenizer to [~TFPreTrainedModel.prepare_tf_dataset]. You can also change the batch size and shuffle the dataset here if you'd like:
+
+dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP
+tf_dataset = model.prepare_tf_dataset(
+        dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer
+    )  # doctest: +SKIP
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20d6e53dd1c36ea52166e20e859283dda3132d1d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_11.txt
@@ -0,0 +1,9 @@
+When you're ready, you can call compile and fit to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from tensorflow.keras.optimizers import Adam
+model.compile(optimizer='adam')  # No loss argument!
+model.fit(tf_dataset)  # doctest: +SKIP
+   
+
+What's next?
+Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15b4ef4b3329d7e6a5f247d636adccfed112f2b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+classifier("We are very happy to show you the 🤗 Transformers library.")
+[{'label': 'POSITIVE', 'score': 0.9998}]
+
+If you have more than one input, pass your inputs as a list to the [pipeline] to return a list of dictionaries:
+
+results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
+for result in results:
+     print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
+label: POSITIVE, with score: 0.9998
+label: NEGATIVE, with score: 0.5309
+
+The [pipeline] can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:
+
+import torch
+from transformers import pipeline
+speech_recognizer = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+
+Load an audio dataset (see the 🤗 Datasets Quick Start for more details) you'd like to iterate over. For example, load the MInDS-14 dataset:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")  # doctest: +IGNORE_RESULT
+
+You need to make sure the sampling rate of the dataset matches the sampling 
+rate facebook/wav2vec2-base-960h was trained on:
+
+dataset = dataset.cast_column("audio", Audio(sampling_rate=speech_recognizer.feature_extractor.sampling_rate))
+
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddce41c9710f38c26488b92169eb689841256951
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+The audio files are automatically loaded and resampled when calling the "audio" column.
+Extract the raw waveform arrays from the first 4 samples and pass it as a list to the pipeline:
+
+result = speech_recognizer(dataset[:4]["audio"])
+print([d["text"] for d in result])
+['I WOULD LIKE TO SET UP A JOINT ACCOUNT WITH MY PARTNER HOW DO I PROCEED WITH DOING THAT', "FONDERING HOW I'D SET UP A JOIN TO HELL T WITH MY WIFE AND WHERE THE AP MIGHT BE", "I I'D LIKE TOY SET UP A JOINT ACCOUNT WITH MY PARTNER I'M NOT SEEING THE OPTION TO DO IT ON THE APSO I CALLED IN TO GET SOME HELP CAN I JUST DO IT OVER THE PHONE WITH YOU AND GIVE YOU THE INFORMATION OR SHOULD I DO IT IN THE AP AN I'M MISSING SOMETHING UQUETTE HAD PREFERRED TO JUST DO IT OVER THE PHONE OF POSSIBLE THINGS", 'HOW DO I FURN A JOINA COUT']
+
+For larger datasets where the inputs are big (like in speech or vision), you'll want to pass a generator instead of a list to load all the inputs in memory. Take a look at the pipeline API reference for more information.
+Use another model and tokenizer in the pipeline
+The [pipeline] can accommodate any model from the Hub, making it easy to adapt the [pipeline] for other use-cases. For example, if you'd like a model capable of handling French text, use the tags on the Hub to filter for an appropriate model. The top filtered result returns a multilingual BERT model finetuned for sentiment analysis you can use for French text:
+
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+
+Use [AutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on an AutoClass in the next section):
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+``
+</pt>
+<tf>
+Use [TFAutoModelForSequenceClassification] and [AutoTokenizer] to load the pretrained model and it's associated tokenizer (more on anTFAutoClass` in the next section):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd702c375cb657488fb9b41edde4c75ac894793
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Specify the model and tokenizer in the [pipeline], and now you can apply the classifier on French text:
+
+classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
+classifier("Nous sommes très heureux de vous présenter la bibliothèque 🤗 Transformers.")
+[{'label': '5 stars', 'score': 0.7273}]
+
+If you can't find a model for your use-case, you'll need to finetune a pretrained model on your data. Take a look at our finetuning tutorial to learn how. Finally, after you've finetuned your pretrained model, please consider sharing the model with the community on the Hub to democratize machine learning for everyone! 🤗
+AutoClass
+
+Under the hood, the [AutoModelForSequenceClassification] and [AutoTokenizer] classes work together to power the [pipeline] you used above. An AutoClass is a shortcut that automatically retrieves the architecture of a pretrained model from its name or path. You only need to select the appropriate AutoClass for your task and it's associated preprocessing class. 
+Let's return to the example from the previous section and see how you can use the AutoClass to replicate the results of the [pipeline].
+AutoTokenizer
+A tokenizer is responsible for preprocessing text into an array of numbers as inputs to a model. There are multiple rules that govern the tokenization process, including how to split a word and at what level words should be split (learn more about tokenization in the tokenizer summary). The most important thing to remember is you need to instantiate a tokenizer with the same model name to ensure you're using the same tokenization rules a model was pretrained with.
+Load a tokenizer with [AutoTokenizer]:
+
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1a3c4e368d6c6ab16cbcfffaecf6e75d06405dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_5.txt
@@ -0,0 +1,40 @@
+from transformers import AutoTokenizer
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+Pass your text to the tokenizer:
+
+encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
+print(encoding)
+{'input_ids': [101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
+
+The tokenizer returns a dictionary containing:
+
+input_ids: numerical representations of your tokens.
+attention_mask: indicates which tokens should be attended to.
+
+A tokenizer can also accept a list of inputs, and pad and truncate the text to return a batch with uniform length:
+
+pt_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="pt",
+ )
+
+tf_batch = tokenizer(
+     ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
+     padding=True,
+     truncation=True,
+     max_length=512,
+     return_tensors="tf",
+ )
+
+Check out the preprocess tutorial for more details about tokenization, and how to use an [AutoImageProcessor], [AutoFeatureExtractor] and [AutoProcessor] to preprocess image, audio, and multimodal inputs.
+
+AutoModel
+
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [AutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [AutoModel] for the task. For text (or sequence) classification, you should load [AutoModelForSequenceClassification]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8207bf0b1c1dc04ab224818592d44df8907d2874
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_6.txt
@@ -0,0 +1,37 @@
+from transformers import AutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You just have to unpack the dictionary by adding **:
+
+pt_outputs = pt_model(**pt_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+from torch import nn
+pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)
+print(pt_predictions)
+tensor([[0.0021, 0.0018, 0.0115, 0.2121, 0.7725],
+        [0.2084, 0.1826, 0.1969, 0.1755, 0.2365]], grad_fn=)
+``
+</pt>
+<tf>
+🤗 Transformers provides a simple and unified way to load pretrained instances. This means you can load an [TFAutoModel] like you would load an [AutoTokenizer]. The only difference is selecting the correct [TFAutoModel] for the task. For text (or sequence) classification, you should load [TFAutoModelForSequenceClassification`]:
+
+from transformers import TFAutoModelForSequenceClassification
+model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
+
+See the task summary for tasks supported by an [AutoModel] class.
+
+Now pass your preprocessed batch of inputs directly to the model. You can pass the tensors as-is:
+
+tf_outputs = tf_model(tf_batch)
+
+The model outputs the final activations in the logits attribute. Apply the softmax function to the logits to retrieve the probabilities:
+
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1250151802309207b3bab12829f9c8f59db22f19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_7.txt
@@ -0,0 +1,40 @@
+import tensorflow as tf
+tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
+tf_predictions  # doctest: +IGNORE_RESULT
+
+All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors before the final activation
+function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.
+
+Save a model
+
+Once your model is fine-tuned, you can save it with its tokenizer using [PreTrainedModel.save_pretrained]:
+
+pt_save_directory = "./pt_save_pretrained"
+tokenizer.save_pretrained(pt_save_directory)  # doctest: +IGNORE_RESULT
+pt_model.save_pretrained(pt_save_directory)
+
+When you are ready to use the model again, reload it with [PreTrainedModel.from_pretrained]:
+
+pt_model = AutoModelForSequenceClassification.from_pretrained("./pt_save_pretrained")
+``
+</pt>
+<tf>
+Once your model is fine-tuned, you can save it with its tokenizer using [TFPreTrainedModel.save_pretrained`]:
+
+tf_save_directory = "./tf_save_pretrained"
+tokenizer.save_pretrained(tf_save_directory)  # doctest: +IGNORE_RESULT
+tf_model.save_pretrained(tf_save_directory)
+
+When you are ready to use the model again, reload it with [TFPreTrainedModel.from_pretrained]:
+
+tf_model = TFAutoModelForSequenceClassification.from_pretrained("./tf_save_pretrained")
+
+One particularly cool 🤗 Transformers feature is the ability to save a model and reload it as either a PyTorch or TensorFlow model. The from_pt or from_tf parameter can convert the model from one framework to the other:
+
+from transformers import AutoModel
+tokenizer = AutoTokenizer.from_pretrained(tf_save_directory)
+pt_model = AutoModelForSequenceClassification.from_pretrained(tf_save_directory, from_tf=True)
+
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b731a3a1ee1f43d9efda8fe7e6821462fb1659d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_8.txt
@@ -0,0 +1,35 @@
+from transformers import TFAutoModel
+tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)
+tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)
+
+Custom model builds
+You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.
+Start by importing [AutoConfig], and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained], you can specify the attribute you want to change, such as the number of attention heads:
+
+from transformers import AutoConfig
+my_config = AutoConfig.from_pretrained("distilbert/distilbert-base-uncased", n_heads=12)
+
+Create a model from your custom configuration with [AutoModel.from_config]:
+
+from transformers import AutoModel
+my_model = AutoModel.from_config(my_config)
+``
+</pt>
+<tf>
+Create a model from your custom configuration with [TFAutoModel.from_config`]:
+
+from transformers import TFAutoModel
+my_model = TFAutoModel.from_config(my_config)
+
+Take a look at the Create a custom architecture guide for more information about building custom configurations.
+Trainer - a PyTorch optimized training loop
+All models are a standard torch.nn.Module so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer] class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.
+Depending on your task, you'll typically pass the following parameters to [Trainer]:
+
+You'll start with a [PreTrainedModel] or a torch.nn.Module:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4093f7479a8c49284e5f74dca6a75774130819d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_quicktour.txt_chunk_9.txt
@@ -0,0 +1,58 @@
+[TrainingArguments] contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+        output_dir="path/to/save/folder/",
+        learning_rate=2e-5,
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        num_train_epochs=2,
+    )
+   
+
+Load a preprocessing class like a tokenizer, image processor, feature extractor, or processor:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+   
+
+Load a dataset:
+
+from datasets import load_dataset
+dataset = load_dataset("rotten_tomatoes")  # doctest: +IGNORE_RESULT
+   
+
+Create a function to tokenize the dataset:
+
+def tokenize_dataset(dataset):
+        return tokenizer(dataset["text"])
+   
+
+Then apply it over the entire dataset with [~datasets.Dataset.map]:
+
+dataset = dataset.map(tokenize_dataset, batched=True)
+   
+
+A [DataCollatorWithPadding] to create a batch of examples from your dataset:
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+   
+
+Now gather all these classes in [Trainer]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )  # doctest: +SKIP
+
+When you're ready, call [~Trainer.train] to start training:
+
+trainer.train()  # doctest: +SKIP
+
+For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer] and [Seq2SeqTrainingArguments] classes instead.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9475bf7fa1ed3d48765545a9d41047150685e008
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Train with a script
+Along with the 🤗 Transformers notebooks, there are also example scripts demonstrating how to train a model for a task with PyTorch, TensorFlow, or JAX/Flax.
+You will also find scripts we've used in our research projects and legacy examples which are mostly community contributed. These scripts are not actively maintained and require a specific version of 🤗 Transformers that will most likely be incompatible with the latest version of the library.
+The example scripts are not expected to work out-of-the-box on every problem, and you may need to adapt the script to the problem you're trying to solve. To help you with this, most of the scripts fully expose how data is preprocessed, allowing you to edit it as necessary for your use case.
+For any feature you'd like to implement in an example script, please discuss it on the forum or in an issue before submitting a Pull Request. While we welcome bug fixes, it is unlikely we will merge a Pull Request that adds more functionality at the cost of readability.
+This guide will show you how to run an example summarization training script in PyTorch and TensorFlow. All examples are expected to work with both frameworks unless otherwise specified.
+Setup
+To successfully run the latest version of the example scripts, you have to install 🤗 Transformers from source in a new virtual environment:
+
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a982bc3e5db01c65ed7083b2bcec6523dd53ac8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+git clone https://github.com/huggingface/transformers
+cd transformers
+pip install .
+For older versions of the example scripts, click on the toggle below:
+
+Examples for older versions of 🤗 Transformers
+
+v4.5.1
+v4.4.2
+v4.3.3
+v4.2.2
+v4.1.1
+v4.0.1
+v3.5.1
+v3.4.0
+v3.3.1
+v3.2.0
+v3.1.0
+v3.0.2
+v2.11.0
+v2.10.0
+v2.9.1
+v2.8.0
+v2.7.0
+v2.6.0
+v2.5.1
+v2.4.0
+v2.3.0
+v2.2.0
+v2.1.1
+v2.0.0
+v1.2.0
+v1.1.0
+v1.0.0
+
+Then switch your current clone of 🤗 Transformers to a specific version, like v3.5.1 for example:
+
+git checkout tags/v3.5.1
+After you've setup the correct library version, navigate to the example folder of your choice and install the example specific requirements:
+
+pip install -r requirements.txt
+Run a script
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset with the Trainer on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d62b8e66536ee6f2fe59d4522102dd06c9b0998
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+The example script downloads and preprocesses a dataset from the 🤗 Datasets library. Then the script fine-tunes a dataset using Keras on an architecture that supports summarization. The following example shows how to fine-tune T5-small on the CNN/DailyMail dataset. The T5 model requires an additional source_prefix argument due to how it was trained. This prompt lets T5 know this is a summarization task.
+
+python examples/tensorflow/summarization/run_summarization.py  \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Distributed training and mixed precision
+The Trainer supports distributed training and mixed precision, which means you can also use it in a script. To enable both of these features:
+
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc68bf19dc2de1b797056f1350418d19daa08ef2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_3.txt
@@ -0,0 +1,37 @@
+Add the fp16 argument to enable mixed precision.
+Set the number of GPUs to use with the nproc_per_node argument.
+
+torchrun \
+    --nproc_per_node 8 pytorch/summarization/run_summarization.py \
+    --fp16 \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+TensorFlow scripts utilize a MirroredStrategy for distributed training, and you don't need to add any additional arguments to the training script. The TensorFlow script will use multiple GPUs by default if they are available.
+Run a script on a TPU
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. PyTorch supports TPUs with the XLA deep learning compiler (see here for more details). To use a TPU, launch the xla_spawn.py script and use the num_cores argument to set the number of TPU cores you want to use.
+
+python xla_spawn.py --num_cores 8 \
+    summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3aa52d945d6fc2b71c1be660d310b284c0f82259
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_4.txt
@@ -0,0 +1,41 @@
+Tensor Processing Units (TPUs) are specifically designed to accelerate performance. TensorFlow scripts utilize a TPUStrategy for training on TPUs. To use a TPU, pass the name of the TPU resource to the tpu argument.
+
+python run_summarization.py  \
+    --tpu name_of_tpu_resource \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --output_dir /tmp/tst-summarization  \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 16 \
+    --num_train_epochs 3 \
+    --do_train \
+    --do_eval
+
+Run a script with 🤗 Accelerate
+🤗 Accelerate is a PyTorch-only library that offers a unified method for training a model on several types of setups (CPU-only, multiple GPUs, TPUs) while maintaining complete visibility into the PyTorch training loop. Make sure you have 🤗 Accelerate installed if you don't already have it:
+
+Note: As Accelerate is rapidly developing, the git version of accelerate must be installed to run the scripts
+
+pip install git+https://github.com/huggingface/accelerate
+
+Instead of the run_summarization.py script, you need to use the run_summarization_no_trainer.py script. 🤗 Accelerate supported scripts will have a task_no_trainer.py file in the folder. Begin by running the following command to create and save a configuration file:
+
+accelerate config
+Test your setup to make sure it is configured correctly:
+
+accelerate test
+Now you are ready to launch the training:
+
+accelerate launch run_summarization_no_trainer.py \
+    --model_name_or_path google-t5/t5-small \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir ~/tmp/tst-summarization
+Use a custom dataset
+The summarization script supports custom datasets as long as they are a CSV or JSON Line file. When you use your own dataset, you need to specify several additional arguments:
+
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84fa4a3b9aec1b748e4999cc09299b7558c3aba5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_5.txt
@@ -0,0 +1,43 @@
+train_file and validation_file specify the path to your training and validation files.
+text_column is the input text to summarize.
+summary_column is the target text to output.
+
+A summarization script using a custom dataset would look like this:
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --train_file path_to_csv_or_jsonlines_file \
+    --validation_file path_to_csv_or_jsonlines_file \
+    --text_column text_column_name \
+    --summary_column summary_column_name \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --overwrite_output_dir \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --predict_with_generate
+Test a script
+It is often a good idea to run your script on a smaller number of dataset examples to ensure everything works as expected before committing to an entire dataset which may take hours to complete. Use the following arguments to truncate the dataset to a maximum number of samples:
+
+max_train_samples
+max_eval_samples
+max_predict_samples
+
+python examples/pytorch/summarization/run_summarization.py \
+    --model_name_or_path google-t5/t5-small \
+    --max_train_samples 50 \
+    --max_eval_samples 50 \
+    --max_predict_samples 50 \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
+Not all example scripts support the max_predict_samples argument. If you aren't sure whether your script supports this argument, add the -h argument to check:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53ff58604d36f73ab066aa4653d3bb4270324d56
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_6.txt
@@ -0,0 +1,39 @@
+examples/pytorch/summarization/run_summarization.py -h
+Resume training from checkpoint
+Another helpful option to enable is resuming training from a previous checkpoint. This will ensure you can pick up where you left off without starting over if your training gets interrupted. There are two methods to resume training from a checkpoint.
+The first method uses the output_dir previous_output_dir argument to resume training from the latest checkpoint stored in output_dir. In this case, you should remove overwrite_output_dir:
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --output_dir previous_output_dir \
+    --predict_with_generate
+The second method uses the resume_from_checkpoint path_to_specific_checkpoint argument to resume training from a specific checkpoint folder.
+
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --resume_from_checkpoint path_to_specific_checkpoint \
+    --predict_with_generate
+Share your model
+All scripts can upload your final model to the Model Hub. Make sure you are logged into Hugging Face before you begin:
+
+huggingface-cli login
+Then add the push_to_hub argument to the script. This argument will create a repository with your Hugging Face username and the folder name specified in output_dir.
+To give your repository a specific name, use the push_to_hub_model_id argument to add it. The repository will be automatically listed under your namespace.
+The following example shows how to upload a model with a specific repository name:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3021c4be3cb0de940956b1b240a18410f4a81178
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_run_scripts.txt_chunk_7.txt
@@ -0,0 +1,14 @@
+python examples/pytorch/summarization/run_summarization.py
+    --model_name_or_path google-t5/t5-small \
+    --do_train \
+    --do_eval \
+    --dataset_name cnn_dailymail \
+    --dataset_config "3.0.0" \
+    --source_prefix "summarize: " \
+    --push_to_hub \
+    --push_to_hub_model_id finetuned-t5-cnn_dailymail \
+    --output_dir /tmp/tst-summarization \
+    --per_device_train_batch_size=4 \
+    --per_device_eval_batch_size=4 \
+    --overwrite_output_dir \
+    --predict_with_generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_sagemaker.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_sagemaker.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd091487052a4c043af5fdb2aef495a01e46187c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_sagemaker.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Run training on Amazon SageMaker
+The documentation has been moved to hf.co/docs/sagemaker. This page will be removed in transformers 5.0. 
+Table of Content
+
+Train Hugging Face models on Amazon SageMaker with the SageMaker Python SDK
+Deploy Hugging Face models to Amazon SageMaker with the SageMaker Python SDK
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16d2898333595cc6a2cb98a2a2133877ca361f19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Export to ONNX
+Deploying 🤗 Transformers models in production environments often requires, or can benefit from exporting the models into 
+a serialized format that can be loaded and executed on specialized runtimes and hardware.
+🤗 Optimum is an extension of Transformers that enables exporting models from PyTorch or TensorFlow to serialized formats 
+such as ONNX and TFLite through its exporters module. 🤗 Optimum also provides a set of performance optimization tools to train 
+and run models on targeted hardware with maximum efficiency.
+This guide demonstrates how you can export 🤗 Transformers models to ONNX with 🤗 Optimum, for the guide on exporting models to TFLite, 
+please refer to the Export to TFLite page.
+Export to ONNX
+ONNX (Open Neural Network eXchange) is an open standard that defines a common set of operators and a 
+common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and
+TensorFlow. When a model is exported to the ONNX format, these operators are used to
+construct a computational graph (often called an intermediate representation) which
+represents the flow of data through the neural network.
+By exposing a graph with standardized operators and data types, ONNX makes it easy to
+switch between frameworks. For example, a model trained in PyTorch can be exported to
+ONNX format and then imported in TensorFlow (and vice versa).
+Once exported to ONNX format, a model can be:
+- optimized for inference via techniques such as graph optimization and quantization. 
+- run with ONNX Runtime via ORTModelForXXX classes,
+which follow the same AutoModel API as the one you are used to in 🤗 Transformers.
+- run with optimized inference pipelines,
+which has the same API as the [pipeline] function in 🤗 Transformers. 
+🤗 Optimum provides support for the ONNX export by leveraging configuration objects. These configuration objects come 
+ready-made for a number of model architectures, and are designed to be easily extendable to other architectures.
+For the list of ready-made configurations, please refer to 🤗 Optimum documentation.
+There are two ways to export a 🤗 Transformers model to ONNX, here we show both:
+
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..211627cbd863b6ea407c18b9c91319926d138a32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+export with 🤗 Optimum via CLI.
+export with 🤗 Optimum with optimum.onnxruntime.
+
+Exporting a 🤗 Transformers model to ONNX with CLI
+To export a 🤗 Transformers model to ONNX, first install an extra dependency:
+
+pip install optimum[exporters]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export onnx --help
+To export a model's checkpoint from the 🤗 Hub, for example, distilbert/distilbert-base-uncased-distilled-squad, run the following command: 
+
+optimum-cli export onnx --model distilbert/distilbert-base-uncased-distilled-squad distilbert_base_uncased_squad_onnx/
+You should see the logs indicating progress and showing where the resulting model.onnx is saved, like this:
+
+Validating ONNX model distilbert_base_uncased_squad_onnx/model.onnx
+    -[✓] ONNX model output names match reference model (start_logits, end_logits)
+    - Validating ONNX Model output "start_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+    - Validating ONNX Model output "end_logits":
+        -[✓] (2, 16) matches (2, 16)
+        -[✓] all values close (atol: 0.0001)
+The ONNX export succeeded and the exported model was saved at: distilbert_base_uncased_squad_onnx
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub and provide the --task argument. 
+You can review the list of supported tasks in the 🤗 Optimum documentation.
+If task argument is not provided, it will default to the model architecture without any task specific head.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bbb6587e3f62851cd36b623581639ee0be99d18d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_2.txt
@@ -0,0 +1,43 @@
+optimum-cli export onnx --model local_path --task question-answering distilbert_base_uncased_squad_onnx/
+The resulting model.onnx file can then be run on one of the many
+accelerators that support the ONNX
+standard. For example, we can load and run the model with ONNX
+Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+tokenizer = AutoTokenizer.from_pretrained("distilbert_base_uncased_squad_onnx")
+model = ORTModelForQuestionAnswering.from_pretrained("distilbert_base_uncased_squad_onnx")
+inputs = tokenizer("What am I using?", "Using DistilBERT with ONNX Runtime!", return_tensors="pt")
+outputs = model(**inputs)
+
+The process is identical for TensorFlow checkpoints on the Hub. For instance, here's how you would
+export a pure TensorFlow checkpoint from the Keras organization:
+
+optimum-cli export onnx --model keras-io/transformers-qa distilbert_base_cased_squad_onnx/
+Exporting a 🤗 Transformers model to ONNX with optimum.onnxruntime
+Alternative to CLI, you can export a 🤗 Transformers model to ONNX programmatically like so: 
+thon
+
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer
+model_checkpoint = "distilbert_base_uncased_squad"
+save_directory = "onnx/"
+Load a model from transformers and export it to ONNX
+ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+Save the onnx model and tokenizer
+ort_model.save_pretrained(save_directory)
+tokenizer.save_pretrained(save_directory)
+
+Exporting a model for an unsupported architecture
+If you wish to contribute by adding support for a model that cannot be currently exported, you should first check if it is
+supported in optimum.exporters.onnx,
+and if it is not, contribute to 🤗 Optimum
+directly.
+Exporting a model with transformers.onnx
+
+tranformers.onnx is no longer maintained, please export models with 🤗 Optimum as described above. This section will be removed in the future versions.
+
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8de75309796a0b0b243958b9906b16f2c674cce3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_serialization.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+To export a 🤗 Transformers model to ONNX with tranformers.onnx, install extra dependencies:
+
+pip install transformers[onnx]
+Use transformers.onnx package as a Python module to export a checkpoint using a ready-made configuration:
+
+python -m transformers.onnx --model=distilbert/distilbert-base-uncased onnx/
+This exports an ONNX graph of the checkpoint defined by the --model argument. Pass any checkpoint on the 🤗 Hub or one that's stored locally.
+The resulting model.onnx file can then be run on one of the many accelerators that support the ONNX standard. For example, 
+load and run the model with ONNX Runtime as follows:
+thon
+
+from transformers import AutoTokenizer
+from onnxruntime import InferenceSession
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+session = InferenceSession("onnx/model.onnx")
+ONNX Runtime expects NumPy arrays as input
+inputs = tokenizer("Using DistilBERT with ONNX Runtime!", return_tensors="np")
+outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
+
+The required output names (like ["last_hidden_state"]) can be obtained by taking a look at the ONNX configuration of 
+each model. For example, for DistilBERT we have:
+thon
+
+from transformers.models.distilbert import DistilBertConfig, DistilBertOnnxConfig
+config = DistilBertConfig()
+onnx_config = DistilBertOnnxConfig(config)
+print(list(onnx_config.outputs.keys()))
+["last_hidden_state"]
+
+The process is identical for TensorFlow checkpoints on the Hub. For example, export a pure TensorFlow checkpoint like so:
+
+python -m transformers.onnx --model=keras-io/transformers-qa onnx/
+To export a model that's stored locally, save the model's weights and tokenizer files in the same directory (e.g. local-pt-checkpoint), 
+then export it to ONNX by pointing the --model argument of the transformers.onnx package to the desired directory:
+
+python -m transformers.onnx --model=local-pt-checkpoint onnx/
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb2031a0edc83d4b7f8a904807b185e10c91bdc5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+What 🤗 Transformers can do
+🤗 Transformers is a library of pretrained state-of-the-art models for natural language processing (NLP), computer vision, and audio and speech processing tasks. Not only does the library contain Transformer models, but it also has non-Transformer models like modern convolutional networks for computer vision tasks. If you look at some of the most popular consumer products today, like smartphones, apps, and televisions, odds are that some kind of deep learning technology is behind it. Want to remove a background object from a picture taken by your smartphone? This is an example of a panoptic segmentation task (don't worry if you don't know what this means yet, we'll describe it in the following sections!). 
+This page provides an overview of the different speech and audio, computer vision, and NLP tasks that can be solved with the 🤗 Transformers library in just three lines of code!
+Audio
+Audio and speech processing tasks are a little different from the other modalities mainly because audio as an input is a continuous signal. Unlike text, a raw audio waveform can't be neatly split into discrete chunks the way a sentence can be divided into words. To get around this, the raw audio signal is typically sampled at regular intervals. If you take more samples within an interval, the sampling rate is higher, and the audio more closely resembles the original audio source.
+Previous approaches preprocessed the audio to extract useful features from it. It is now more common to start audio and speech processing tasks by directly feeding the raw audio waveform to a feature encoder to extract an audio representation. This simplifies the preprocessing step and allows the model to learn the most essential features.
+Audio classification
+Audio classification is a task that labels audio data from a predefined set of classes. It is a broad category with many specific applications, some of which include:
+
+acoustic scene classification: label audio with a scene label ("office", "beach", "stadium")
+acoustic event detection: label audio with a sound event label ("car horn", "whale calling", "glass breaking")
+tagging: label audio containing multiple sounds (birdsongs, speaker identification in a meeting)
+music classification: label music with a genre label ("metal", "hip-hop", "country")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b417c1c63435c67e57edd6a85d8a1de867e6fdc1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+from transformers import pipeline
+classifier = pipeline(task="audio-classification", model="superb/hubert-base-superb-er")
+preds = classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.4532, 'label': 'hap'},
+ {'score': 0.3622, 'label': 'sad'},
+ {'score': 0.0943, 'label': 'neu'},
+ {'score': 0.0903, 'label': 'ang'}]
+
+Automatic speech recognition
+Automatic speech recognition (ASR) transcribes speech into text. It is one of the most common audio tasks due partly to speech being such a natural form of human communication. Today, ASR systems are embedded in "smart" technology products like speakers, phones, and cars. We can ask our virtual assistants to play music, set reminders, and tell us the weather. 
+But one of the key challenges Transformer architectures have helped with is in low-resource languages. By pretraining on large amounts of speech data, finetuning the model on only one hour of labeled speech data in a low-resource language can still produce high-quality results compared to previous ASR systems trained on 100x more labeled data.
+
+from transformers import pipeline
+transcriber = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
+transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac")
+{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}
+
+Computer vision
+One of the first and earliest successful computer vision tasks was recognizing images of zip code numbers using a convolutional neural network (CNN). An image is composed of pixels, and each pixel has a numerical value. This makes it easy to represent an image as a matrix of pixel values. Each particular combination of pixel values describes the colors of an image. 
+Two general ways computer vision tasks can be solved are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab8a445076c570e004a6daabc6efcf955e0b43a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_10.txt
@@ -0,0 +1,14 @@
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+doc_question_answerer = pipeline("document-question-answering", model="magorshunov/layoutlm-invoices")
+preds = doc_question_answerer(
+     question="What is the total amount?",
+     image=image,
+ )
+preds
+[{'score': 0.8531, 'answer': '17,000', 'start': 4, 'end': 4}]
+
+Hopefully, this page has given you some more background information about all the types of tasks in each modality and the practical importance of each one. In the next section, you'll learn how 🤗 Transformers work to solve these tasks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ae409a443f9d744fba3b8a49016bbe29b17e882
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+Use convolutions to learn the hierarchical features of an image from low-level features to high-level abstract things.
+Split an image into patches and use a Transformer to gradually learn how each image patch is related to each other to form an image. Unlike the bottom-up approach favored by a CNN, this is kind of like starting out with a blurry image and then gradually bringing it into focus.
+
+Image classification
+Image classification labels an entire image from a predefined set of classes. Like most classification tasks, there are many practical use cases for image classification, some of which include:
+
+healthcare: label medical images to detect disease or monitor patient health
+environment: label satellite images to monitor deforestation, inform wildland management or detect wildfires
+agriculture: label images of crops to monitor plant health or satellite images for land use monitoring 
+ecology: label images of animal or plant species to monitor wildlife populations or track endangered species
+
+from transformers import pipeline
+classifier = pipeline(task="image-classification")
+preds = classifier(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.4335, 'label': 'lynx, catamount'}
+{'score': 0.0348, 'label': 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor'}
+{'score': 0.0324, 'label': 'snow leopard, ounce, Panthera uncia'}
+{'score': 0.0239, 'label': 'Egyptian cat'}
+{'score': 0.0229, 'label': 'tiger cat'}
+
+Object detection
+Unlike image classification, object detection identifies multiple objects within an image and the objects' positions in an image (defined by the bounding box). Some example applications of object detection include:
+
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3480983728158cad3f93f324756fa540b5a6f452
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+self-driving vehicles: detect everyday traffic objects such as other vehicles, pedestrians, and traffic lights
+remote sensing: disaster monitoring, urban planning, and weather forecasting
+defect detection: detect cracks or structural damage in buildings, and manufacturing defects
+
+from transformers import pipeline
+detector = pipeline(task="object-detection")
+preds = detector(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"], "box": pred["box"]} for pred in preds]
+preds
+[{'score': 0.9865,
+  'label': 'cat',
+  'box': {'xmin': 178, 'ymin': 154, 'xmax': 882, 'ymax': 598}}]
+
+Image segmentation
+Image segmentation is a pixel-level task that assigns every pixel in an image to a class. It differs from object detection, which uses bounding boxes to label and predict objects in an image because segmentation is more granular. Segmentation can detect objects at a pixel-level. There are several types of image segmentation:
+
+instance segmentation: in addition to labeling the class of an object, it also labels each distinct instance of an object ("dog-1", "dog-2")
+panoptic segmentation: a combination of semantic and instance segmentation; it labels each pixel with a semantic class and each distinct instance of an object
+
+Segmentation tasks are helpful in self-driving vehicles to create a pixel-level map of the world around them so they can navigate safely around pedestrians and other vehicles. It is also useful for medical imaging, where the task's finer granularity can help identify abnormal cells or organ features. Image segmentation can also be used in ecommerce to virtually try on clothes or create augmented reality experiences by overlaying objects in the real world through your camera.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99dfc8e0e55e035698660fa984558802464081bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+from transformers import pipeline
+segmenter = pipeline(task="image-segmentation")
+preds = segmenter(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+print(*preds, sep="\n")
+{'score': 0.9879, 'label': 'LABEL_184'}
+{'score': 0.9973, 'label': 'snow'}
+{'score': 0.9972, 'label': 'cat'}
+
+Depth estimation
+Depth estimation predicts the distance of each pixel in an image from the camera. This computer vision task is especially important for scene understanding and reconstruction. For example, in self-driving cars, vehicles need to understand how far objects like pedestrians, traffic signs, and other vehicles are to avoid obstacles and collisions. Depth information is also helpful for constructing 3D representations from 2D images and can be used to create high-quality 3D representations of biological structures or buildings.
+There are two approaches to depth estimation:
+
+stereo: depths are estimated by comparing two images of the same image from slightly different angles
+monocular: depths are estimated from a single image
+
+from transformers import pipeline
+depth_estimator = pipeline(task="depth-estimation")
+preds = depth_estimator(
+     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
+ )
+
+Natural language processing
+NLP tasks are among the most common types of tasks because text is such a natural way for us to communicate. To get text into a format recognized by a model, it needs to be tokenized. This means dividing a sequence of text into separate words or subwords (tokens) and then converting these tokens into numbers. As a result, you can represent a sequence of text as a sequence of numbers, and once you have a sequence of numbers, it can be input into a model to solve all sorts of NLP tasks!
+Text classification
+Like classification tasks in any modality, text classification labels a sequence of text (it can be sentence-level, a paragraph, or a document) from a predefined set of classes. There are many practical applications for text classification, some of which include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e7f845d43d522bef94a137427f43871e214a813
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_5.txt
@@ -0,0 +1,16 @@
+sentiment analysis: label text according to some polarity like positive or negative which can inform and support decision-making in fields like politics, finance, and marketing
+content classification: label text according to some topic to help organize and filter information in news and social media feeds (weather, sports, finance, etc.)
+
+from transformers import pipeline
+classifier = pipeline(task="sentiment-analysis")
+preds = classifier("Hugging Face is the best thing since sliced bread!")
+preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
+preds
+[{'score': 0.9991, 'label': 'POSITIVE'}]
+
+Token classification
+In any NLP task, text is preprocessed by separating the sequence of text into individual words or subwords. These are known as tokens. Token classification assigns each token a label from a predefined set of classes. 
+Two common types of token classification are:
+
+named entity recognition (NER): label a token according to an entity category like organization, person, location or date. NER is especially popular in biomedical settings, where it can label genes, proteins, and drug names.
+part-of-speech tagging (POS): label a token according to its part-of-speech like noun, verb, or adjective. POS is useful for helping translation systems understand how two identical words are grammatically different (bank as a noun versus bank as a verb).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5a53c7516b5fb88e89aaf9399868065c15c28fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+from transformers import pipeline
+classifier = pipeline(task="ner")
+preds = classifier("Hugging Face is a French company based in New York City.")
+preds = [
+     {
+         "entity": pred["entity"],
+         "score": round(pred["score"], 4),
+         "index": pred["index"],
+         "word": pred["word"],
+         "start": pred["start"],
+         "end": pred["end"],
+     }
+     for pred in preds
+ ]
+print(*preds, sep="\n")
+{'entity': 'I-ORG', 'score': 0.9968, 'index': 1, 'word': 'Hu', 'start': 0, 'end': 2}
+{'entity': 'I-ORG', 'score': 0.9293, 'index': 2, 'word': '##gging', 'start': 2, 'end': 7}
+{'entity': 'I-ORG', 'score': 0.9763, 'index': 3, 'word': 'Face', 'start': 8, 'end': 12}
+{'entity': 'I-MISC', 'score': 0.9983, 'index': 6, 'word': 'French', 'start': 18, 'end': 24}
+{'entity': 'I-LOC', 'score': 0.999, 'index': 10, 'word': 'New', 'start': 42, 'end': 45}
+{'entity': 'I-LOC', 'score': 0.9987, 'index': 11, 'word': 'York', 'start': 46, 'end': 50}
+{'entity': 'I-LOC', 'score': 0.9992, 'index': 12, 'word': 'City', 'start': 51, 'end': 55}
+
+Question answering
+Question answering is another token-level task that returns an answer to a question, sometimes with context (open-domain) and other times without context (closed-domain). This task happens whenever we ask a virtual assistant something like whether a restaurant is open. It can also provide customer or technical support and help search engines retrieve the relevant information you're asking for. 
+There are two common types of question answering:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9faaed07b278d9d18fa9c772d531a690968ef9f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_7.txt
@@ -0,0 +1,20 @@
+extractive: given a question and some context, the answer is a span of text from the context the model must extract
+abstractive: given a question and some context, the answer is generated from the context; this approach is handled by the [Text2TextGenerationPipeline] instead of the [QuestionAnsweringPipeline] shown below
+
+from transformers import pipeline
+question_answerer = pipeline(task="question-answering")
+preds = question_answerer(
+     question="What is the name of the repository?",
+     context="The name of the repository is huggingface/transformers",
+ )
+print(
+     f"score: {round(preds['score'], 4)}, start: {preds['start']}, end: {preds['end']}, answer: {preds['answer']}"
+ )
+score: 0.9327, start: 30, end: 54, answer: huggingface/transformers
+
+Summarization
+Summarization creates a shorter version of a text from a longer one while trying to preserve most of the meaning of the original document. Summarization is a sequence-to-sequence task; it outputs a shorter text sequence than the input. There are a lot of long-form documents that can be summarized to help readers quickly understand the main points. Legislative bills, legal and financial documents, patents, and scientific papers are a few examples of documents that could be summarized to save readers time and serve as a reading aid.
+Like question answering, there are two types of summarization:
+
+extractive: identify and extract the most important sentences from the original text
+abstractive: generate the target summary (which may include new words not in the input document) from the original text; the [SummarizationPipeline] uses the abstractive approach
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..348002a97ab93e153a884887ec81cf6d584e93b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_8.txt
@@ -0,0 +1,20 @@
+from transformers import pipeline
+summarizer = pipeline(task="summarization")
+summarizer(
+     "In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles."
+ )
+[{'summary_text': ' The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .'}]
+
+Translation
+Translation converts a sequence of text in one language to another. It is important in helping people from different backgrounds communicate with each other, help translate content to reach wider audiences, and even be a learning tool to help people learn a new language. Along with summarization, translation is a sequence-to-sequence task, meaning the model receives an input sequence and returns a target output sequence. 
+In the early days, translation models were mostly monolingual, but recently, there has been increasing interest in multilingual models that can translate between many pairs of languages.
+
+from transformers import pipeline
+text = "translate English to French: Hugging Face is a community-based open-source platform for machine learning."
+translator = pipeline(task="translation", model="google-t5/t5-small")
+translator(text)
+[{'translation_text': "Hugging Face est une tribune communautaire de l'apprentissage des machines."}]
+
+Language modeling
+Language modeling is a task that predicts a word in a sequence of text. It has become a very popular NLP task because a pretrained language model can be finetuned for many other downstream tasks. Lately, there has been a lot of interest in large language models (LLMs) which demonstrate zero- or few-shot learning. This means the model can solve tasks it wasn't explicitly trained to do! Language models can be used to generate fluent and convincing text, though you need to be careful since the text may not always be accurate.
+There are two types of language modeling:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58046b2cfbe779b05679c8d20fca0766194ff6c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_task_summary.txt_chunk_9.txt
@@ -0,0 +1,32 @@
+causal: the model's objective is to predict the next token in a sequence, and future tokens are masked
+
+from transformers import pipeline
+prompt = "Hugging Face is a community-based open-source platform for machine learning."
+generator = pipeline(task="text-generation")
+generator(prompt)  # doctest: +SKIP
+
+masked: the model's objective is to predict a masked token in a sequence with full access to the tokens in the sequence
+
+text = "Hugging Face is a community-based open-source  for machine learning."
+fill_mask = pipeline(task="fill-mask")
+preds = fill_mask(text, top_k=1)
+preds = [
+     {
+         "score": round(pred["score"], 4),
+         "token": pred["token"],
+         "token_str": pred["token_str"],
+         "sequence": pred["sequence"],
+     }
+     for pred in preds
+ ]
+preds
+[{'score': 0.2236,
+  'token': 1761,
+  'token_str': ' platform',
+  'sequence': 'Hugging Face is a community-based open-source platform for machine learning.'}]
+
+Multimodal
+Multimodal tasks require a model to process multiple data modalities (text, image, audio, video) to solve a particular problem. Image captioning is an example of a multimodal task where the model takes an image as input and outputs a sequence of text describing the image or some properties of the image. 
+Although multimodal models work with different data types or modalities, internally, the preprocessing steps help the model convert all the data types into embeddings (vectors or list of numbers that holds meaningful information about the data). For a task like image captioning, the model learns relationships between image embeddings and text embeddings.
+Document question answering
+Document question answering is a task that answers natural language questions from a document. Unlike a token-level question answering task which takes text as input, document question answering takes an image of a document as input along with a question about the document and returns an answer. Document question answering can be used to parse structured documents and extract key information from it. In the example below, the total amount and change due can be extracted from a receipt.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b9628cddbddc8513deb46864a8bb4d97061e6b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+How 🤗 Transformers solve tasks
+In What 🤗 Transformers can do, you learned about natural language processing (NLP), speech and audio, computer vision tasks, and some important applications of them. This page will look closely at how models solve these tasks and explain what's happening under the hood. There are many ways to solve a given task, some models may implement certain techniques or even approach the task from a new angle, but for Transformer models, the general idea is the same. Owing to its flexible architecture, most models are a variant of an encoder, decoder, or encoder-decoder structure. In addition to Transformer models, our library also has several convolutional neural networks (CNNs), which are still used today for computer vision tasks. We'll also explain how a modern CNN works.
+To explain how tasks are solved, we'll walk through what goes on inside the model to output useful predictions.
+
+Wav2Vec2 for audio classification and automatic speech recognition (ASR)
+Vision Transformer (ViT) and ConvNeXT for image classification
+DETR for object detection
+Mask2Former for image segmentation
+GLPN for depth estimation
+BERT for NLP tasks like text classification, token classification and question answering that use an encoder
+GPT2 for NLP tasks like text generation that use a decoder
+BART for NLP tasks like summarization and translation that use an encoder-decoder
+
+Before you go further, it is good to have some basic knowledge of the original Transformer architecture. Knowing how encoders, decoders, and attention work will aid you in understanding how different Transformer models work. If you're just getting started or need a refresher, check out our course for more information! 
+
+Speech and audio
+Wav2Vec2 is a self-supervised model pretrained on unlabeled speech data and finetuned on labeled data for audio classification and automatic speech recognition. 
+
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74a6fe5ef0f8e230af9a5bbf06baefe963815f3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+This model has four main components:
+
+A feature encoder takes the raw audio waveform, normalizes it to zero mean and unit variance, and converts it into a sequence of feature vectors that are each 20ms long.
+
+Waveforms are continuous by nature, so they can't be divided into separate units like a sequence of text can be split into words. That's why the feature vectors are passed to a quantization module, which aims to learn discrete speech units. The speech unit is chosen from a collection of codewords, known as a codebook (you can think of this as the vocabulary). From the codebook, the vector or speech unit, that best represents the continuous audio input is chosen and forwarded through the model.
+
+About half of the feature vectors are randomly masked, and the masked feature vector is fed to a context network, which is a Transformer encoder that also adds relative positional embeddings.
+
+The pretraining objective of the context network is a contrastive task. The model has to predict the true quantized speech representation of the masked prediction from a set of false ones, encouraging the model to find the most similar context vector and quantized speech unit (the target label).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2b4697708c2b046b915f5b82dd96dcf5a45121d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_10.txt
@@ -0,0 +1,15 @@
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
+
+To use the pretrained model for text classification, add a sequence classification head on top of the base BERT model. The sequence classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and target to find the most likely label.
+Ready to try your hand at text classification? Check out our complete text classification guide to learn how to finetune DistilBERT and use it for inference!
+Token classification
+To use BERT for token classification tasks like named entity recognition (NER), add a token classification head on top of the base BERT model. The token classification head is a linear layer that accepts the final hidden states and performs a linear transformation to convert them into logits. The cross-entropy loss is calculated between the logits and each token to find the most likely label.
+Ready to try your hand at token classification? Check out our complete token classification guide to learn how to finetune DistilBERT and use it for inference!
+Question answering
+To use BERT for question answering, add a span classification head on top of the base BERT model. This linear layer accepts the final hidden states and performs a linear transformation to compute the span start and end logits corresponding to the answer. The cross-entropy loss is calculated between the logits and the label position to find the most likely span of text corresponding to the answer.
+Ready to try your hand at question answering? Check out our complete question answering guide to learn how to finetune DistilBERT and use it for inference!
+
+💡 Notice how easy it is to use BERT for different tasks once it's been pretrained. You only need to add a specific head to the pretrained model to manipulate the hidden states into your desired output!
+
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8fd632cda822a1e74d2561c960b761f23be2efad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_11.txt
@@ -0,0 +1,14 @@
+Text generation
+GPT-2 is a decoder-only model pretrained on a large amount of text. It can generate convincing (though not always true!) text given a prompt and complete other NLP tasks like question answering despite not being explicitly trained to.
+
+GPT-2 uses byte pair encoding (BPE) to tokenize words and generate a token embedding. Positional encodings are added to the token embeddings to indicate the position of each token in the sequence. The input embeddings are passed through multiple decoder blocks to output some final hidden state. Within each decoder block, GPT-2 uses a masked self-attention layer which means GPT-2 can't attend to future tokens. It is only allowed to attend to tokens on the left. This is different from BERT's [mask] token because, in masked self-attention, an attention mask is used to set the score to 0 for future tokens.
+
+The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The label is the next token in the sequence, which are created by shifting the logits to the right by one. The cross-entropy loss is calculated between the shifted logits and the labels to output the next most likely token.
+
+GPT-2's pretraining objective is based entirely on causal language modeling, predicting the next word in a sequence. This makes GPT-2 especially good at tasks that involve generating text.
+Ready to try your hand at text generation? Check out our complete causal language modeling guide to learn how to finetune DistilGPT-2 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Summarization
+Encoder-decoder models like BART and T5 are designed for the sequence-to-sequence pattern of a summarization task. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16fc318fc90253d5d5736112a1a39647ec14ceef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_12.txt
@@ -0,0 +1,7 @@
+BART's encoder architecture is very similar to BERT and accepts a token and positional embedding of the text. BART is pretrained by corrupting the input and then reconstructing it with the decoder. Unlike other encoders with specific corruption strategies, BART can apply any type of corruption. The text infilling corruption strategy works the best though. In text infilling, a number of text spans are replaced with a single [mask] token. This is important because the model has to predict the masked tokens, and it teaches the model to predict the number of missing tokens. The input embeddings and masked spans are passed through the encoder to output some final hidden states, but unlike BERT, BART doesn't add a final feedforward network at the end to predict a word.
+
+The encoder's output is passed to the decoder, which must predict the masked tokens and any uncorrupted tokens from the encoder's output. This gives additional context to help the decoder restore the original text. The output from the decoder is passed to a language modeling head, which performs a linear transformation to convert the hidden states into logits. The cross-entropy loss is calculated between the logits and the label, which is just the token shifted to the right.
+
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01a56e61b9cb643907b5aba7a6e68a2570874dc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_13.txt
@@ -0,0 +1,11 @@
+Ready to try your hand at summarization? Check out our complete summarization guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
+
+Translation
+Translation is another example of a sequence-to-sequence task, which means you can use an encoder-decoder model like BART or T5 to do it. We'll explain how BART works in this section, and then you can try finetuning T5 at the end.
+BART adapts to translation by adding a separate randomly initialized encoder to map a source language to an input that can be decoded into the target language. This new encoder's embeddings are passed to the pretrained encoder instead of the original word embeddings. The source encoder is trained by updating the source encoder, positional embeddings, and input embeddings with the cross-entropy loss from the model output. The model parameters are frozen in this first step, and all the model parameters are trained together in the second step.
+BART has since been followed up by a multilingual version, mBART, intended for translation and pretrained on many different languages.
+Ready to try your hand at translation? Check out our complete translation guide to learn how to finetune T5 and use it for inference!
+
+For more information about text generation, check out the text generation strategies guide!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4f91ed7ad44bf5e811fb83ed22c68f2c7214fd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+Now that wav2vec2 is pretrained, you can finetune it on your data for audio classification or automatic speech recognition!
+Audio classification
+To use the pretrained model for audio classification, add a sequence classification head on top of the base Wav2Vec2 model. The classification head is a linear layer that accepts the encoder's hidden states. The hidden states represent the learned features from each audio frame which can have varying lengths. To create one vector of fixed-length, the hidden states are pooled first and then transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and target to find the most likely class.
+Ready to try your hand at audio classification? Check out our complete audio classification guide to learn how to finetune Wav2Vec2 and use it for inference!
+Automatic speech recognition
+To use the pretrained model for automatic speech recognition, add a language modeling head on top of the base Wav2Vec2 model for connectionist temporal classification (CTC). The language modeling head is a linear layer that accepts the encoder's hidden states and transforms them into logits. Each logit represents a token class (the number of tokens comes from the task vocabulary). The CTC loss is calculated between the logits and targets to find the most likely sequence of tokens, which are then decoded into a transcription.
+Ready to try your hand at automatic speech recognition? Check out our complete automatic speech recognition guide to learn how to finetune Wav2Vec2 and use it for inference!
+Computer vision
+There are two ways to approach computer vision tasks:
+
+Split an image into a sequence of patches and process them in parallel with a Transformer.
+Use a modern CNN, like ConvNeXT, which relies on convolutional layers but adopts modern network designs.
+
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0224834baa992daecba8b068bf9dbd18cdd80757
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+A third approach mixes Transformers with convolutions (for example, Convolutional Vision Transformer or LeViT). We won't discuss those because they just combine the two approaches we examine here.
+
+ViT and ConvNeXT are commonly used for image classification, but for other vision tasks like object detection, segmentation, and depth estimation, we'll look at DETR, Mask2Former and GLPN, respectively; these models are better suited for those tasks.
+Image classification
+ViT and ConvNeXT can both be used for image classification; the main difference is that ViT uses an attention mechanism while ConvNeXT uses convolutions.
+Transformer
+ViT replaces convolutions entirely with a pure Transformer architecture. If you're familiar with the original Transformer, then you're already most of the way toward understanding ViT.
+
+The main change ViT introduced was in how images are fed to a Transformer:
+
+An image is split into square non-overlapping patches, each of which gets turned into a vector or patch embedding. The patch embeddings are generated from a convolutional 2D layer which creates the proper input dimensions (which for a base Transformer is 768 values for each patch embedding). If you had a 224x224 pixel image, you could split it into 196 16x16 image patches. Just like how text is tokenized into words, an image is "tokenized" into a sequence of patches.
+
+A learnable embedding - a special [CLS] token - is added to the beginning of the patch embeddings just like BERT. The final hidden state of the [CLS] token is used as the input to the attached classification head; other outputs are ignored. This token helps the model learn how to encode a representation of the image.
+
+The last thing to add to the patch and learnable embeddings are the position embeddings because the model doesn't know how the image patches are ordered. The position embeddings are also learnable and have the same size as the patch embeddings. Finally, all of the embeddings are passed to the Transformer encoder.
+
+The output, specifically only the output with the [CLS] token, is passed to a multilayer perceptron head (MLP). ViT's pretraining objective is simply classification. Like other classification heads, the MLP head converts the output into logits over the class labels and calculates the cross-entropy loss to find the most likely class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6818e04fc42d835f4eef1bab32d0c9ea3d7428d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Ready to try your hand at image classification? Check out our complete image classification guide to learn how to finetune ViT and use it for inference!
+CNN
+
+This section briefly explains convolutions, but it'd be helpful to have a prior understanding of how they change an image's shape and size. If you're unfamiliar with convolutions, check out the Convolution Neural Networks chapter from the fastai book!
+
+ConvNeXT is a CNN architecture that adopts new and modern network designs to improve performance. However, convolutions are still at the core of the model. From a high-level perspective, a convolution is an operation where a smaller matrix (kernel) is multiplied by a small window of the image pixels. It computes some features from it, such as a particular texture or curvature of a line. Then it slides over to the next window of pixels; the distance the convolution travels is known as the stride. 
+
+A basic convolution without padding or stride, taken from A guide to convolution arithmetic for deep learning.
+You can feed this output to another convolutional layer, and with each successive layer, the network learns more complex and abstract things like hotdogs or rockets. Between convolutional layers, it is common to add a pooling layer to reduce dimensionality and make the model more robust to variations of a feature's position.
+
+ConvNeXT modernizes a CNN in five ways:
+
+Change the number of blocks in each stage and "patchify" an image with a larger stride and corresponding kernel size. The non-overlapping sliding window makes this patchifying strategy similar to how ViT splits an image into patches.
+
+A bottleneck layer shrinks the number of channels and then restores it because it is faster to do a 1x1 convolution, and you can increase the depth. An inverted bottleneck does the opposite by expanding the number of channels and shrinking them, which is more memory efficient.
+
+Replace the typical 3x3 convolutional layer in the bottleneck layer with depthwise convolution, which applies a convolution to each input channel separately and then stacks them back together at the end. This widens the network width for improved performance.
+
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..982e0c636351d345d2f9c6e17ec81eefaae44ba7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_5.txt
@@ -0,0 +1,12 @@
+ViT has a global receptive field which means it can see more of an image at once thanks to its attention mechanism. ConvNeXT attempts to replicate this effect by increasing the kernel size to 7x7.
+
+ConvNeXT also makes several layer design changes that imitate Transformer models. There are fewer activation and normalization layers,  the activation function is switched to GELU instead of ReLU, and it uses LayerNorm instead of BatchNorm.
+
+The output from the convolution blocks is passed to a classification head which converts the outputs into logits and calculates the cross-entropy loss to find the most likely label.
+Object detection
+DETR, DEtection TRansformer, is an end-to-end object detection model that combines a CNN with a Transformer encoder-decoder.
+
+A pretrained CNN backbone takes an image, represented by its pixel values, and creates a low-resolution feature map of it. A 1x1 convolution is applied to the feature map to reduce dimensionality and it creates a new feature map with a high-level image representation. Since the Transformer is a sequential model, the feature map is flattened into a sequence of feature vectors that are combined with positional embeddings.
+
+The feature vectors are passed to the encoder, which learns the image representations using its attention layers. Next, the encoder hidden states are combined with object queries in the decoder. Object queries are learned embeddings that focus on the different regions of an image, and they're updated as they progress through each attention layer. The decoder hidden states are passed to a feedforward network that predicts the bounding box coordinates and class label for each object query, or no object if there isn't one.
+DETR decodes each object query in parallel to output N final predictions, where N is the number of queries. Unlike a typical autoregressive model that predicts one element at a time, object detection is a set prediction task (bounding box, class label) that makes N predictions in a single pass.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1facdf90b4d77618f072d5108299f1129acee92
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+DETR uses a bipartite matching loss during training to compare a fixed number of predictions with a fixed set of ground truth labels. If there are fewer ground truth labels in the set of N labels, then they're padded with a no object class. This loss function encourages DETR to find a one-to-one assignment between the predictions and ground truth labels. If either the bounding boxes or class labels aren't correct, a loss is incurred. Likewise, if DETR predicts an object that doesn't exist, it is penalized. This encourages DETR to find other objects in an image instead of focusing on one really prominent object.
+
+An object detection head is added on top of DETR to find the class label and the coordinates of the bounding box. There are two components to the object detection head: a linear layer to transform the decoder hidden states into logits over the class labels, and a MLP to predict the bounding box.
+Ready to try your hand at object detection? Check out our complete object detection guide to learn how to finetune DETR and use it for inference!
+Image segmentation
+Mask2Former is a universal architecture for solving all types of image segmentation tasks. Traditional segmentation models are typically tailored towards a particular subtask of image segmentation, like instance, semantic or panoptic segmentation. Mask2Former frames each of those tasks as a mask classification problem. Mask classification groups pixels into N segments, and predicts N masks and their corresponding class label for a given image. We'll explain how Mask2Former works in this section, and then you can try finetuning SegFormer at the end.
+
+There are three main components to Mask2Former:
+
+A Swin backbone accepts an image and creates a low-resolution image feature map from 3 consecutive 3x3 convolutions.
+
+The feature map is passed to a pixel decoder which gradually upsamples the low-resolution features into high-resolution per-pixel embeddings. The pixel decoder actually generates multi-scale features (contains both low- and high-resolution features) with resolutions 1/32, 1/16, and 1/8th of the original image.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df7e54f77a407e933002d49d49f862854e2f0f30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+Each of these feature maps of differing scales is fed successively to one Transformer decoder layer at a time in order to capture small objects from the high-resolution features. The key to Mask2Former is the masked attention mechanism in the decoder. Unlike cross-attention which can attend to the entire image, masked attention only focuses on a certain area of the image. This is faster and leads to better performance because the local features of an image are enough for the model to learn from.
+
+Like DETR, Mask2Former also uses learned object queries and combines them with the image features from the pixel decoder to make a set prediction (class label, mask prediction). The decoder hidden states are passed into a linear layer and transformed into logits over the class labels. The cross-entropy loss is calculated between the logits and class label to find the most likely one.
+The mask predictions are generated by combining the pixel-embeddings with the final decoder hidden states. The sigmoid cross-entropy and dice loss is calculated between the logits and the ground truth mask to find the most likely mask.
+
+Ready to try your hand at object detection? Check out our complete image segmentation guide to learn how to finetune SegFormer and use it for inference!
+Depth estimation
+GLPN, Global-Local Path Network, is a Transformer for depth estimation that combines a SegFormer encoder with a lightweight decoder.
+
+Like ViT, an image is split into a sequence of patches, except these image patches are smaller. This is better for dense prediction tasks like segmentation or depth estimation. The image patches are transformed into patch embeddings (see the image classification section for more details about how patch embeddings are created), which are fed to the encoder.
+
+The encoder accepts the patch embeddings, and passes them through several encoder blocks. Each block consists of attention and Mix-FFN layers. The purpose of the latter is to provide positional information. At the end of each encoder block is a patch merging layer for creating hierarchical representations. The features of each group of neighboring patches are concatenated, and a linear layer is applied to the concatenated features to reduce the number of patches to a resolution of 1/4. This becomes the input to the next encoder block, where this whole process is repeated until you have image features with resolutions of 1/8, 1/16, and 1/32.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4ca9e613c34c4b463a5574e54c3e46fc198932c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_8.txt
@@ -0,0 +1,8 @@
+A lightweight decoder takes the last feature map (1/32 scale) from the encoder and upsamples it to 1/16 scale. From here, the feature is passed into a Selective Feature Fusion (SFF) module, which selects and combines local and global features from an attention map for each feature and then upsamples it to 1/8th. This process is repeated until the decoded features are the same size as the original image. The output is passed through two convolution layers and then a sigmoid activation is applied to predict the depth of each pixel.
+
+Natural language processing
+The Transformer was initially designed for machine translation, and since then, it has practically become the default architecture for solving all NLP tasks. Some tasks lend themselves to the Transformer's encoder structure, while others are better suited for the decoder. Still, other tasks make use of both the Transformer's encoder-decoder structure.
+Text classification
+BERT is an encoder-only model and is the first model to effectively implement deep bidirectionality to learn richer representations of the text by attending to words on both sides.
+
+BERT uses WordPiece tokenization to generate a token embedding of the text. To tell the difference between a single sentence and a pair of sentences, a special [SEP] token is added to differentiate them. A special [CLS] token is added to the beginning of every sequence of text. The final output with the [CLS] token is used as the input to the classification head for classification tasks. BERT also adds a segment embedding to denote whether a token belongs to the first or second sentence in a pair of sentences.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ba312343718b746ca1b001280ba21cae2dc1a8a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tasks_explained.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+BERT is pretrained with two objectives: masked language modeling and next-sentence prediction. In masked language modeling, some percentage of the input tokens are randomly masked, and the model needs to predict these. This solves the issue of bidirectionality, where the model could cheat and see all the words and "predict" the next word. The final hidden states of the predicted mask tokens are passed to a feedforward network with a softmax over the vocabulary to predict the masked word.
+The second pretraining object is next-sentence prediction. The model must predict whether sentence B follows sentence A. Half of the time sentence B is the next sentence, and the other half of the time, sentence B is a random sentence. The prediction, whether it is the next sentence or not, is passed to a feedforward network with a softmax over the two classes (IsNext and NotNext).
+
+The input embeddings are passed through multiple encoder layers to output some final hidden states.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d6fbaaca1eb7edf474dddb7ec068910d93e5f9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_0.txt
@@ -0,0 +1,60 @@
+Testing
+Let's take a look at how 🤗 Transformers models are tested and how you can write new tests and improve the existing ones.
+There are 2 test suites in the repository:
+
+tests -- tests for the general API
+examples -- tests primarily for various applications that aren't part of the API
+
+How transformers are tested
+
+Once a PR is submitted it gets tested with 9 CircleCi jobs. Every new commit to that PR gets retested. These jobs
+   are defined in this config file, so that if needed you can reproduce the same
+   environment on your machine.
+
+These CI jobs don't run @slow tests.
+
+There are 3 jobs run by github actions:
+
+torch hub integration: checks whether torch hub
+     integration works.
+
+self-hosted (push): runs fast tests on GPU only on commits on
+     main. It only runs if a commit on main has updated the code in one of the following folders: src,
+     tests, .github (to prevent running on added model cards, notebooks, etc.)
+
+self-hosted runner: runs normal and slow tests on GPU in
+     tests and examples:
+
+RUN_SLOW=1 pytest tests/
+RUN_SLOW=1 pytest examples/
+The results can be observed here.
+Running tests
+Choosing which tests to run
+This document goes into many details of how tests can be run. If after reading everything, you need even more details
+you will find them here.
+Here are some most useful ways of running tests.
+Run all:
+console
+pytest
+or:
+
+make test
+Note that the latter is defined as:
+
+python -m pytest -n auto --dist=loadfile -s -v ./tests/
+which tells pytest to:
+
+run as many test processes as they are CPU cores (which could be too many if you don't have a ton of RAM!)
+ensure that all tests from the same file will be run by the same test process
+do not capture output
+run in verbose mode
+
+Getting the list of all tests
+All tests of the test suite:
+
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..464ae6657dadbcbd72c44f0b8394a60822d7ee04
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+pytest --collect-only -q
+All tests of a given test file:
+
+pytest tests/test_optimization.py --collect-only -q
+Run a specific test module
+To run an individual test module:
+
+pytest tests/utils/test_logging.py
+Run specific tests
+Since unittest is used inside most of the tests, to run specific subtests you need to know the name of the unittest
+class containing those tests. For example, it could be:
+
+pytest tests/test_optimization.py::OptimizationTest::test_adam_w
+Here:
+
+tests/test_optimization.py - the file with tests
+OptimizationTest - the name of the class
+test_adam_w - the name of the specific test function
+
+If the file contains multiple classes, you can choose to run only tests of a given class. For example:
+
+pytest tests/test_optimization.py::OptimizationTest
+will run all the tests inside that class.
+As mentioned earlier you can see what tests are contained inside the OptimizationTest class by running:
+
+pytest tests/test_optimization.py::OptimizationTest --collect-only -q
+You can run tests by keyword expressions.
+To run only tests whose name contains adam:
+
+pytest -k adam tests/test_optimization.py
+Logical and and or can be used to indicate whether all keywords should match or either. not can be used to
+negate.
+To run all tests except those whose name contains adam:
+
+pytest -k "not adam" tests/test_optimization.py
+And you can combine the two patterns in one:
+
+pytest -k "ada and not adam" tests/test_optimization.py
+For example to run both test_adafactor and test_adam_w you can use:
+
+pytest -k "test_adafactor or test_adam_w" tests/test_optimization.py
+Note that we use or here, since we want either of the keywords to match to include both.
+If you want to include only tests that include both patterns, and is to be used:
+
+pytest -k "test and ada" tests/test_optimization.py
+Run accelerate tests
+Sometimes you need to run accelerate tests on your models. For that you can just add -m accelerate_tests to your command, if let's say you want to run these tests on OPT run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..736054ab1dce2a0426d42a799851eba65a295fd2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_10.txt
@@ -0,0 +1,43 @@
+pytest test_this1.py::TestMathUnitTest::test_floor_0_negative  test_this1.py::TestMathUnitTest::test_floor_1_integer
+The module parameterized which is already in the developer dependencies
+of transformers works for both: unittests and pytest tests.
+If, however, the test is not a unittest, you may use pytest.mark.parametrize (or you may see it being used in
+some existing tests, mostly under examples).
+Here is the same example, this time using pytest's parametrize marker:
+thon
+test_this2.py
+import pytest
+@pytest.mark.parametrize(
+    "name, input, expected",
+    [
+        ("negative", -1.5, -2.0),
+        ("integer", 1, 1.0),
+        ("large fraction", 1.6, 1),
+    ],
+)
+def test_floor(name, input, expected):
+    assert_equal(math.floor(input), expected)
+
+Same as with parameterized, with pytest.mark.parametrize you can have a fine control over which sub-tests are
+run, if the -k filter doesn't do the job. Except, this parametrization function creates a slightly different set of
+names for the sub-tests. Here is what they look like:
+
+pytest test_this2.py --collect-only -q
+and it will list:
+
+test_this2.py::test_floor[integer-1-1.0]
+test_this2.py::test_floor[negative--1.5--2.0]
+test_this2.py::test_floor[large fraction-1.6-1]
+So now you can run just the specific test:
+
+pytest test_this2.py::test_floor[negative--1.5--2.0] test_this2.py::test_floor[integer-1-1.0]
+as in the previous example.
+Files and directories
+In tests often we need to know where things are relative to the current test file, and it's not trivial since the test
+could be invoked from more than one directory or could reside in sub-directories with different depths. A helper class
+transformers.test_utils.TestCasePlus solves this problem by sorting out all the basic paths and provides easy
+accessors to them:
+
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3c7626495f17d8fe93b043febf0c2cc90c2f35b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_11.txt
@@ -0,0 +1,51 @@
+pathlib objects (all fully resolved):
+
+test_file_path - the current test file path, i.e. __file__
+
+test_file_dir - the directory containing the current test file
+tests_dir - the directory of the tests test suite
+examples_dir - the directory of the examples test suite
+repo_root_dir - the directory of the repository
+
+src_dir - the directory of src (i.e. where the transformers sub-dir resides)
+
+stringified paths---same as above but these return paths as strings, rather than pathlib objects:
+
+test_file_path_str
+
+test_file_dir_str
+tests_dir_str
+examples_dir_str
+repo_root_dir_str
+src_dir_str
+
+To start using those all you need is to make sure that the test resides in a subclass of
+transformers.test_utils.TestCasePlus. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_local_locations(self):
+        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
+
+If you don't need to manipulate paths via pathlib or you just need a path as a string, you can always invoked
+str() on the pathlib object or use the accessors ending with _str. For example:
+thon
+from transformers.testing_utils import TestCasePlus
+class PathExampleTest(TestCasePlus):
+    def test_something_involving_stringified_locations(self):
+        examples_dir = self.examples_dir_str
+
+Temporary files and directories
+Using unique temporary files and directories are essential for parallel test running, so that the tests won't overwrite
+each other's data. Also we want to get the temporary files and directories removed at the end of each test that created
+them. Therefore, using packages like tempfile, which address these needs is essential.
+However, when debugging tests, you need to be able to see what goes into the temporary file or directory and you want
+to know it's exact path and not having it randomized on every test re-run.
+A helper class transformers.test_utils.TestCasePlus is best used for such purposes. It's a sub-class of
+unittest.TestCase, so we can easily inherit from it in the test modules.
+Here is an example of its usage:
+thon
+from transformers.testing_utils import TestCasePlus
+class ExamplesTests(TestCasePlus):
+    def test_whatever(self):
+        tmp_dir = self.get_auto_remove_tmp_dir()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efe86ede2518c1f22c7506a13e802d9726962005
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_12.txt
@@ -0,0 +1,48 @@
+This code creates a unique temporary directory, and sets tmp_dir to its location.
+
+Create a unique temporary dir:
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir()
+tmp_dir will contain the path to the created temporary dir. It will be automatically removed at the end of the
+test.
+
+Create a temporary dir of my choice, ensure it's empty before the test starts and don't empty it after the test.
+
+python
+def test_whatever(self):
+    tmp_dir = self.get_auto_remove_tmp_dir("./xxx")
+This is useful for debug when you want to monitor a specific directory and want to make sure the previous tests didn't
+leave any data in there.
+
+You can override the default behavior by directly overriding the before and after args, leading to one of the
+  following behaviors:
+
+before=True: the temporary dir will always be cleared at the beginning of the test.
+
+before=False: if the temporary dir already existed, any existing files will remain there.
+after=True: the temporary dir will always be deleted at the end of the test.
+after=False: the temporary dir will always be left intact at the end of the test.
+
+In order to run the equivalent of rm -r safely, only subdirs of the project repository checkout are allowed if
+an explicit tmp_dir is used, so that by mistake no /tmp or similar important part of the filesystem will
+get nuked. i.e. please always pass paths that start with ./.
+
+Each test can register multiple temporary directories and they all will get auto-removed, unless requested
+otherwise.
+
+Temporary sys.path override
+If you need to temporary override sys.path to import from another test for example, you can use the
+ExtendSysPath context manager. Example:
+thon
+import os
+from transformers.testing_utils import ExtendSysPath
+bindir = os.path.abspath(os.path.dirname(file))
+with ExtendSysPath(f"{bindir}/.."):
+    from test_trainer import TrainerIntegrationCommon  # noqa
+
+Skipping tests
+This is useful when a bug is found and a new test is written, yet the bug is not fixed yet. In order to be able to
+commit it to the main repository we need make sure it's skipped during make test.
+Methods:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0497bc5fbc5701836f013b6eef45a62e8bfcbd58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_13.txt
@@ -0,0 +1,46 @@
+A skip means that you expect your test to pass only if some conditions are met, otherwise pytest should skip
+  running the test altogether. Common examples are skipping windows-only tests on non-windows platforms, or skipping
+  tests that depend on an external resource which is not available at the moment (for example a database).
+
+A xfail means that you expect a test to fail for some reason. A common example is a test for a feature not yet
+  implemented, or a bug not yet fixed. When a test passes despite being expected to fail (marked with
+  pytest.mark.xfail), it’s an xpass and will be reported in the test summary.
+
+One of the important differences between the two is that skip doesn't run the test, and xfail does. So if the
+code that's buggy causes some bad state that will affect other tests, do not use xfail.
+Implementation
+
+Here is how to skip whole test unconditionally:
+
+python no-style
+@unittest.skip("this bug needs to be fixed")
+def test_feature_x():
+or via pytest:
+python no-style
+@pytest.mark.skip(reason="this bug needs to be fixed")
+or the xfail way:
+python no-style
+@pytest.mark.xfail
+def test_feature_x():
+Here's how to skip a test based on internal checks within the test:
+python
+def test_feature_x():
+    if not has_something():
+        pytest.skip("unsupported configuration")
+or the whole module:
+thon
+import pytest
+if not pytest.config.getoption("--custom-flag"):
+    pytest.skip("--custom-flag is missing, skipping tests", allow_module_level=True)
+
+or the xfail way:
+python
+def test_feature_x():
+    pytest.xfail("expected to fail until bug XYZ is fixed")
+
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a04dad2a160a09b8426c700961148b464c4be4bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_14.txt
@@ -0,0 +1,29 @@
+Here is how to skip all tests in a module if some import is missing:
+
+python
+docutils = pytest.importorskip("docutils", minversion="0.3")
+
+Skip a test based on a condition:
+
+python no-style
+@pytest.mark.skipif(sys.version_info < (3,6), reason="requires python3.6 or higher")
+def test_feature_x():
+or:
+python no-style
+@unittest.skipIf(torch_device == "cpu", "Can't do half precision")
+def test_feature_x():
+or skip the whole module:
+python no-style
+@pytest.mark.skipif(sys.platform == 'win32', reason="does not run on windows")
+class TestClass():
+    def test_feature_x(self):
+More details, example and ways are here.
+Slow tests
+The library of tests is ever-growing, and some of the tests take minutes to run, therefore we can't afford waiting for
+an hour for the test suite to complete on CI. Therefore, with some exceptions for essential tests, slow tests should be
+marked as in the example below:
+python no-style
+from transformers.testing_utils import slow
+@slow
+def test_integration_foo():
+Once a test is marked as @slow, to run such tests set RUN_SLOW=1 env var, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acd8af37deb0586c81a3b2b615b94b7d030786f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_15.txt
@@ -0,0 +1,26 @@
+RUN_SLOW=1 pytest tests
+Some decorators like @parameterized rewrite test names, therefore @slow and the rest of the skip decorators
+@require_* have to be listed last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@slow
+def test_integration_foo():
+As explained at the beginning of this document, slow tests get to run on a scheduled basis, rather than in PRs CI
+checks. So it's possible that some problems will be missed during a PR submission and get merged. Such problems will
+get caught during the next scheduled CI job. But it also means that it's important to run the slow tests on your
+machine before submitting the PR.
+Here is a rough decision making mechanism for choosing which tests should be marked as slow:
+If the test is focused on one of the library's internal components (e.g., modeling files, tokenization files,
+pipelines), then we should run that test in the non-slow test suite. If it's focused on an other aspect of the library,
+such as the documentation or the examples, then we should run these tests in the slow test suite. And then, to refine
+this approach we should have exceptions:
+
+All tests that need to download a heavy set of weights or a dataset that is larger than ~50MB (e.g., model or
+  tokenizer integration tests, pipeline integration tests) should be set to slow. If you're adding a new model, you
+  should create and upload to the hub a tiny version of it (with random weights) for integration tests. This is
+  discussed in the following paragraphs.
+All tests that need to do a training not specifically optimized to be fast should be set to slow.
+We can introduce exceptions if some of these should-be-non-slow tests are excruciatingly slow, and set them to
+  @slow. Auto-modeling tests, which save and load large files to disk, are a good example of tests that are marked
+  as @slow.
+If a test completes under 1 second on CI (including downloads if any) then it should be a normal test regardless.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..474d06a4982c9c0d0d2bf140298291871d37f796
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_16.txt
@@ -0,0 +1,35 @@
+Collectively, all the non-slow tests need to cover entirely the different internals, while remaining fast. For example,
+a significant coverage can be achieved by testing with specially created tiny models with random weights. Such models
+have the very minimal number of layers (e.g., 2), vocab size (e.g., 1000), etc. Then the @slow tests can use large
+slow models to do qualitative testing. To see the use of these simply look for tiny models with:
+
+grep tiny tests examples
+Here is a an example of a script that created the tiny model
+stas/tiny-wmt19-en-de. You can easily adjust it to your specific
+model's architecture.
+It's easy to measure the run-time incorrectly if for example there is an overheard of downloading a huge model, but if
+you test it locally the downloaded files would be cached and thus the download time not measured. Hence check the
+execution speed report in CI logs instead (the output of pytest --durations=0 tests).
+That report is also useful to find slow outliers that aren't marked as such, or which need to be re-written to be fast.
+If you notice that the test suite starts getting slow on CI, the top listing of this report will show the slowest
+tests.
+Testing the stdout/stderr output
+In order to test functions that write to stdout and/or stderr, the test can access those streams using the
+pytest's capsys system. Here is how this is accomplished:
+thon
+import sys
+def print_to_stdout(s):
+    print(s)
+def print_to_stderr(s):
+    sys.stderr.write(s)
+def test_result_and_stdout(capsys):
+    msg = "Hello"
+    print_to_stdout(msg)
+    print_to_stderr(msg)
+    out, err = capsys.readouterr()  # consume the captured output streams
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    sys.stderr.write(err)
+    # test:
+    assert msg in out
+    assert msg in err
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1ad241cf286ab56ae8f8c792305cf1d616d269d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_17.txt
@@ -0,0 +1,58 @@
+And, of course, most of the time, stderr will come as a part of an exception, so try/except has to be used in such
+a case:
+thon
+def raise_exception(msg):
+    raise ValueError(msg)
+def test_something_exception():
+    msg = "Not a good value"
+    error = ""
+    try:
+        raise_exception(msg)
+    except Exception as e:
+        error = str(e)
+        assert msg in error, f"{msg} is in the exception:\n{error}"
+
+Another approach to capturing stdout is via contextlib.redirect_stdout:
+thon
+from io import StringIO
+from contextlib import redirect_stdout
+def print_to_stdout(s):
+    print(s)
+def test_result_and_stdout():
+    msg = "Hello"
+    buffer = StringIO()
+    with redirect_stdout(buffer):
+        print_to_stdout(msg)
+    out = buffer.getvalue()
+    # optional: if you want to replay the consumed streams:
+    sys.stdout.write(out)
+    # test:
+    assert msg in out
+
+An important potential issue with capturing stdout is that it may contain \r characters that in normal print
+reset everything that has been printed so far. There is no problem with pytest, but with pytest -s these
+characters get included in the buffer, so to be able to have the test run with and without -s, you have to make an
+extra cleanup to the captured output, using re.sub(r'~.*\r', '', buf, 0, re.M).
+But, then we have a helper context manager wrapper to automatically take care of it all, regardless of whether it has
+some \r's in it or not, so it's a simple:
+thon
+from transformers.testing_utils import CaptureStdout
+with CaptureStdout() as cs:
+    function_that_writes_to_stdout()
+print(cs.out)
+
+Here is a full test example:
+thon
+from transformers.testing_utils import CaptureStdout
+msg = "Secret message\r"
+final = "Hello World"
+with CaptureStdout() as cs:
+    print(msg + final)
+assert cs.out == final + "\n", f"captured: {cs.out}, expecting {final}"
+
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d1e37af14aef02e2f7eee4324b1c6e2fa0ce904
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_18.txt
@@ -0,0 +1,46 @@
+If you'd like to capture stderr use the CaptureStderr class instead:
+thon
+from transformers.testing_utils import CaptureStderr
+with CaptureStderr() as cs:
+    function_that_writes_to_stderr()
+print(cs.err)
+
+If you need to capture both streams at once, use the parent CaptureStd class:
+thon
+from transformers.testing_utils import CaptureStd
+with CaptureStd() as cs:
+    function_that_writes_to_stdout_and_stderr()
+print(cs.err, cs.out)
+
+Also, to aid debugging test issues, by default these context managers automatically replay the captured streams on exit
+from the context.
+Capturing logger stream
+If you need to validate the output of a logger, you can use CaptureLogger:
+thon
+from transformers import logging
+from transformers.testing_utils import CaptureLogger
+msg = "Testing 1, 2, 3"
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers.models.bart.tokenization_bart")
+with CaptureLogger(logger) as cl:
+    logger.info(msg)
+assert cl.out, msg + "\n"
+
+Testing with environment variables
+If you want to test the impact of environment variables for a specific test you can use a helper decorator
+transformers.testing_utils.mockenv
+thon
+from transformers.testing_utils import mockenv
+class HfArgumentParserTest(unittest.TestCase):
+    @mockenv(TRANSFORMERS_VERBOSITY="error")
+    def test_env_override(self):
+        env_level_str = os.getenv("TRANSFORMERS_VERBOSITY", None)
+
+At times an external program needs to be called, which requires setting PYTHONPATH in os.environ to include
+multiple local paths. A helper class transformers.test_utils.TestCasePlus comes to help:
+thon
+from transformers.testing_utils import TestCasePlus
+class EnvExampleTest(TestCasePlus):
+    def test_external_prog(self):
+        env = self.get_env()
+        # now call the external program, passing env to it
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07b3290d1d93a79fa987eb6ea40b272b767d2f37
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_19.txt
@@ -0,0 +1,43 @@
+Depending on whether the test file was under the tests test suite or examples it'll correctly set up
+env[PYTHONPATH] to include one of these two directories, and also the src directory to ensure the testing is
+done against the current repo, and finally with whatever env[PYTHONPATH] was already set to before the test was
+called if anything.
+This helper method creates a copy of the os.environ object, so the original remains intact.
+Getting reproducible results
+In some situations you may want to remove randomness for your tests. To get identical reproducible results set, you
+will need to fix the seed:
+thon
+seed = 42
+python RNG
+import random
+random.seed(seed)
+pytorch RNGs
+import torch
+torch.manual_seed(seed)
+torch.backends.cudnn.deterministic = True
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed)
+numpy RNG
+import numpy as np
+np.random.seed(seed)
+tf RNG
+tf.random.set_seed(seed)
+
+Debugging tests
+To start a debugger at the point of the warning, do this:
+
+pytest tests/utils/test_logging.py -W error::UserWarning --pdb
+Working with github actions workflows
+To trigger a self-push workflow CI job, you must:
+
+Create a new branch on transformers origin (not a fork!).
+The branch name has to start with either ci_ or ci- (main triggers it too, but we can't do PRs on
+   main). It also gets triggered only for specific paths - you can find the up-to-date definition in case it
+   changed since this document has been written here under push:
+Create a PR from this branch.
+Then you can see the job appear here. It may not run right away if there
+   is a backlog.
+
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e305cf266a2ba0b89ba74895f6123b1757f5f6f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_2.txt
@@ -0,0 +1,40 @@
+RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py
+Run documentation tests
+In order to test whether the documentation examples are correct, you should check that the doctests are passing. 
+As an example, let's use WhisperModel.forward's docstring: 
+thon 
+r"""
+Returns:
+Example:
+    thon
+    >>> import torch
+    >>> from transformers import WhisperModel, WhisperFeatureExtractor
+    >>> from datasets import load_dataset
+>>> model = WhisperModel.from_pretrained("openai/whisper-base")
+>>> feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-base")
+>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
+>>> input_features = inputs.input_features
+>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
+>>> last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
+>>> list(last_hidden_state.shape)
+[1, 2, 512]
+```"""
+
+Just run the following line to automatically test every docstring example in the desired file: 
+ 
+pytest --doctest-modules <path_to_file_or_dir>
+If the file has a markdown extention, you should add the --doctest-glob="*.md" argument.
+Run only modified tests
+You can run the tests related to the unstaged files or the current branch (according to Git) by using pytest-picked. This is a great way of quickly testing your changes didn't break
+anything, since it won't run the tests related to files you didn't touch.
+
+pip install pytest-picked
+
+pytest --picked
+All tests will be run from files and folders which are modified, but not yet committed.
+Automatically rerun failed tests on source modification
+pytest-xdist provides a very useful feature of detecting all failed
+tests, and then waiting for you to modify files and continuously re-rerun those failing tests until they pass while you
+fix them. So that you don't need to re start pytest after you made the fix. This is repeated until all tests pass after
+which again a full run is performed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17f83a337d4d9ac0372d4f7ff47940d22bc7312b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_20.txt
@@ -0,0 +1,35 @@
+Testing Experimental CI Features
+Testing CI features can be potentially problematic as it can interfere with the normal CI functioning. Therefore if a
+new CI feature is to be added, it should be done as following.
+
+Create a new dedicated job that tests what needs to be tested
+The new job must always succeed so that it gives us a green ✓ (details below).
+Let it run for some days to see that a variety of different PR types get to run on it (user fork branches,
+   non-forked branches, branches originating from github.com UI direct file edit, various forced pushes, etc. - there
+   are so many) while monitoring the experimental job's logs (not the overall job green as it's purposefully always
+   green)
+When it's clear that everything is solid, then merge the new changes into existing jobs.
+
+That way experiments on CI functionality itself won't interfere with the normal workflow.
+Now how can we make the job always succeed while the new CI feature is being developed?
+Some CIs, like TravisCI support ignore-step-failure and will report the overall job as successful, but CircleCI and
+Github Actions as of this writing don't support that.
+So the following workaround can be used:
+
+set +euo pipefail at the beginning of the run command to suppress most potential failures in the bash script.
+the last command must be a success: echo "done" or just true will do
+
+Here is an example:
+yaml
+- run:
+    name: run CI experiment
+    command: |
+        set +euo pipefail
+        echo "setting run-all-despite-any-errors-mode"
+        this_command_will_fail
+        echo "but bash continues to run"
+        # emulate another failure
+        false
+        # but the last command must be a success
+        echo "during experiment do not remove: reporting success to CI, even if there were failures"
+For simple commands you could also do:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57685a3ebb73ebd7d08b67585fbabfcbc59e1d23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_21.txt
@@ -0,0 +1,20 @@
+cmd_that_may_fail || true
+Of course, once satisfied with the results, integrate the experimental step or job with the rest of the normal jobs,
+while removing set +euo pipefail or any other things you may have added to ensure that the experimental job doesn't
+interfere with the normal CI functioning.
+This whole process would have been much easier if we only could set something like allow-failure for the
+experimental step, and let it fail without impacting the overall status of PRs. But as mentioned earlier CircleCI and
+Github Actions don't support it at the moment.
+You can vote for this feature and see where it is at these CI-specific threads:
+
+Github Actions:
+CircleCI:
+
+DeepSpeed integration
+For a PR that involves the DeepSpeed integration, keep in mind our CircleCI PR CI setup doesn't have GPUs. Tests requiring GPUs are run on a different CI nightly. This means if you get a passing CI report in your PR, it doesn’t mean the DeepSpeed tests pass.
+To run DeepSpeed tests:
+
+RUN_SLOW=1 pytest tests/deepspeed/test_deepspeed.py
+Any changes to the modeling or PyTorch examples code requires running the model zoo tests as well.
+
+RUN_SLOW=1 pytest tests/deepspeed
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b93a81b14126e7e63b6bf1961f9df4bf990b2e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_3.txt
@@ -0,0 +1,46 @@
+pip install pytest-xdist
+To enter the mode: pytest -f or pytest --looponfail
+File changes are detected by looking at looponfailroots root directories and all of their contents (recursively).
+If the default for this value does not work for you, you can change it in your project by setting a configuration
+option in setup.cfg:
+ini
+[tool:pytest]
+looponfailroots = transformers tests
+or pytest.ini/tox.ini files:
+ini
+[pytest]
+looponfailroots = transformers tests
+This would lead to only looking for file changes in the respective directories, specified relatively to the ini-file’s
+directory.
+pytest-watch is an alternative implementation of this functionality.
+Skip a test module
+If you want to run all test modules, except a few you can exclude them by giving an explicit list of tests to run. For
+example, to run all except test_modeling_*.py tests:
+
+pytest *ls -1 tests/*py | grep -v test_modeling*
+Clearing state
+CI builds and when isolation is important (against speed), cache should be cleared:
+
+pytest --cache-clear tests
+Running tests in parallel
+As mentioned earlier make test runs tests in parallel via pytest-xdist plugin (-n X argument, e.g. -n 2
+to run 2 parallel jobs).
+pytest-xdist's --dist= option allows one to control how the tests are grouped. --dist=loadfile puts the
+tests located in one file onto the same process.
+Since the order of executed tests is different and unpredictable, if running the test suite with pytest-xdist
+produces failures (meaning we have some undetected coupled tests), use pytest-replay to replay the tests in the same order, which should help with then somehow
+reducing that failing sequence to a minimum.
+Test order and repetition
+It's good to repeat the tests several times, in sequence, randomly, or in sets, to detect any potential
+inter-dependency and state-related bugs (tear down). And the straightforward multiple repetition is just good to detect
+some problems that get uncovered by randomness of DL.
+Repeat tests
+
+pytest-flakefinder:
+
+pip install pytest-flakefinder
+And then run every test multiple times (50 by default):
+
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d3a654562d066f1d40960b4dc6a733e1517846b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_4.txt
@@ -0,0 +1,45 @@
+pytest --flake-finder --flake-runs=5 tests/test_failing_test.py
+
+This plugin doesn't work with -n flag from pytest-xdist.
+
+There is another plugin pytest-repeat, but it doesn't work with unittest.
+
+Run tests in a random order
+
+pip install pytest-random-order
+Important: the presence of pytest-random-order will automatically randomize tests, no configuration change or
+command line options is required.
+As explained earlier this allows detection of coupled tests - where one test's state affects the state of another. When
+pytest-random-order is installed it will print the random seed it used for that session, e.g:
+
+pytest tests
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+So that if the given particular sequence fails, you can reproduce it by adding that exact seed, e.g.:
+
+pytest --random-order-seed=573663
+[]
+Using --random-order-bucket=module
+Using --random-order-seed=573663
+It will only reproduce the exact order if you use the exact same list of tests (or no list at all). Once you start to
+manually narrowing down the list you can no longer rely on the seed, but have to list them manually in the exact order
+they failed and tell pytest to not randomize them instead using --random-order-bucket=none, e.g.:
+
+pytest --random-order-bucket=none tests/test_a.py tests/test_c.py tests/test_b.py
+To disable the shuffling for all tests:
+
+pytest --random-order-bucket=none
+By default --random-order-bucket=module is implied, which will shuffle the files on the module levels. It can also
+shuffle on class, package, global and none levels. For the complete details please see its
+documentation.
+Another randomization alternative is: pytest-randomly. This
+module has a very similar functionality/interface, but it doesn't have the bucket modes available in
+pytest-random-order. It has the same problem of imposing itself once installed.
+Look and feel variations
+pytest-sugar
+pytest-sugar is a plugin that improves the look-n-feel, adds a
+progressbar, and show tests that fail and the assert instantly. It gets activated automatically upon installation.
+
+pip install pytest-sugar
+To run tests without it, run:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5860958a7becff8e5f4e5bce11a12104bae30a18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_5.txt
@@ -0,0 +1,34 @@
+pip install pytest-sugar
+To run tests without it, run:
+
+pytest -p no:sugar
+or uninstall it.
+Report each sub-test name and its progress
+For a single or a group of tests via pytest (after pip install pytest-pspec):
+
+pytest --pspec tests/test_optimization.py
+Instantly shows failed tests
+pytest-instafail shows failures and errors instantly instead of
+waiting until the end of test session.
+
+pip install pytest-instafail
+
+pytest --instafail
+To GPU or not to GPU
+On a GPU-enabled setup, to test in CPU-only mode add CUDA_VISIBLE_DEVICES="":
+
+CUDA_VISIBLE_DEVICES="" pytest tests/utils/test_logging.py
+or if you have multiple gpus, you can specify which one is to be used by pytest. For example, to use only the
+second gpu if you have gpus 0 and 1, you can run:
+
+CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py
+This is handy when you want to run different tasks on different GPUs.
+Some tests must be run on CPU-only, others on either CPU or GPU or TPU, yet others on multiple-GPUs. The following skip
+decorators are used to set the requirements of tests CPU/GPU/TPU-wise:
+
+require_torch - this test will run only under torch
+require_torch_gpu - as require_torch plus requires at least 1 GPU
+require_torch_multi_gpu - as require_torch plus requires at least 2 GPUs
+require_torch_non_multi_gpu - as require_torch plus requires 0 or 1 GPUs
+require_torch_up_to_2_gpus - as require_torch plus requires 0 or 1 or 2 GPUs
+require_torch_xla - as require_torch plus requires at least 1 TPU
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcbeb20667973ebb4f4173268778792024bf6d43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_6.txt
@@ -0,0 +1,44 @@
+Let's depict the GPU requirements in the following table:
+| n gpus | decorator                      |
+|--------|--------------------------------|
+| >= 0 | @require_torch               |
+| >= 1 | @require_torch_gpu           |
+| >= 2 | @require_torch_multi_gpu     |
+| < 2  | @require_torch_non_multi_gpu |
+| < 3  | @require_torch_up_to_2_gpus  |
+For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:
+python no-style
+@require_torch_multi_gpu
+def test_example_with_multi_gpu():
+If a test requires tensorflow use the require_tf decorator. For example:
+python no-style
+@require_tf
+def test_tf_thing_with_tensorflow():
+These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is
+how to set it up:
+python no-style
+@require_torch_gpu
+@slow
+def test_example_slow_on_gpu():
+Some decorators like @parametrized rewrite test names, therefore @require_* skip decorators have to be listed
+last for them to work correctly. Here is an example of the correct usage:
+python no-style
+@parameterized.expand()
+@require_torch_multi_gpu
+def test_integration_foo():
+This order problem doesn't exist with @pytest.mark.parametrize, you can put it first or last and it will still
+work. But it only works with non-unittests.
+Inside tests:
+
+How many GPUs are available:
+
+thon
+from transformers.testing_utils import get_gpu_count
+n_gpu = get_gpu_count()  # works with torch and tf
+
+Testing with a specific PyTorch backend or device
+To run the test suite on a specific torch device add TRANSFORMERS_TEST_DEVICE="$device" where $device is the target backend. For example, to test on CPU only:
+
+TRANSFORMERS_TEST_DEVICE="cpu" pytest tests/utils/test_logging.py
+This variable is useful for testing custom or less common PyTorch backends such as mps, xpu or npu. It can also be used to achieve the same effect as CUDA_VISIBLE_DEVICES by targeting specific GPUs or testing in CPU-only mode.
+Certain devices will require an additional import after importing torch for the first time. This can be specified using the environment variable TRANSFORMERS_TEST_BACKEND:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..566c7e71d7b8e8ff590e27d0c145348e72b18893
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_7.txt
@@ -0,0 +1,27 @@
+TRANSFORMERS_TEST_BACKEND="torch_npu" pytest tests/utils/test_logging.py
+Alternative backends may also require the replacement of device-specific functions. For example torch.cuda.manual_seed may need to be replaced with a device-specific seed setter like torch.npu.manual_seed or torch.xpu.manual_seed to correctly set a random seed on the device. To specify a new backend with backend-specific device functions when running the test suite, create a Python device specification file spec.py in the format:
+``python
+import torch
+import torch_npu # for xpu, replace it withimport intel_extension_for_pytorch`
+!! Further additional imports can be added here !!
+Specify the device name (eg. 'cuda', 'cpu', 'npu', 'xpu', 'mps')
+DEVICE_NAME = 'npu'
+Specify device-specific backends to dispatch to.
+If not specified, will fallback to 'default' in 'testing_utils.py`
+MANUAL_SEED_FN = torch.npu.manual_seed
+EMPTY_CACHE_FN = torch.npu.empty_cache
+DEVICE_COUNT_FN = torch.npu.device_count
+``
+This format also allows for specification of any additional imports required. To use this file to replace equivalent methods in the test suite, set the environment variableTRANSFORMERS_TEST_DEVICE_SPECto the path of the spec file, e.g.TRANSFORMERS_TEST_DEVICE_SPEC=spec.py`.
+Currently, only MANUAL_SEED_FN, EMPTY_CACHE_FN and DEVICE_COUNT_FN are supported for device-specific dispatch.
+Distributed training
+pytest can't deal with distributed training directly. If this is attempted - the sub-processes don't do the right
+thing and end up thinking they are pytest and start running the test suite in loops. It works, however, if one
+spawns a normal process that then spawns off multiple workers and manages the IO pipes.
+Here are some tests that use it:
+
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75384b04b1795ec094e994e0821bd805d3057851
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_8.txt
@@ -0,0 +1,27 @@
+test_trainer_distributed.py
+test_deepspeed.py
+
+To jump right into the execution point, search for the execute_subprocess_async call in those tests.
+You will need at least 2 GPUs to see these tests in action:
+
+CUDA_VISIBLE_DEVICES=0,1 RUN_SLOW=1 pytest -sv tests/test_trainer_distributed.py
+Output capture
+During test execution any output sent to stdout and stderr is captured. If a test or a setup method fails, its
+according captured output will usually be shown along with the failure traceback.
+To disable output capturing and to get the stdout and stderr normally, use -s or --capture=no:
+
+pytest -s tests/utils/test_logging.py
+To send test results to JUnit format output:
+
+pytest tests --junitxml=result.xml
+Color control
+To have no color (e.g., yellow on white background is not readable):
+
+pytest --color=no tests/utils/test_logging.py
+Sending test report to online pastebin service
+Creating a URL for each test failure:
+
+pytest --pastebin=failed tests/utils/test_logging.py
+This will submit test run information to a remote Paste service and provide a URL for each failure. You may select
+tests as usual or add for example -x if you only want to send one particular failure.
+Creating a URL for a whole test session log:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab601890b838e432535ff19be07a735848f70f4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_testing.txt_chunk_9.txt
@@ -0,0 +1,43 @@
+pytest --pastebin=all tests/utils/test_logging.py
+Writing tests
+🤗 transformers tests are based on unittest, but run by pytest, so most of the time features from both systems
+can be used.
+You can read here which features are supported, but the important
+thing to remember is that most pytest fixtures don't work. Neither parametrization, but we use the module
+parameterized that works in a similar way.
+Parametrization
+Often, there is a need to run the same test multiple times, but with different arguments. It could be done from within
+the test, but then there is no way of running that test for just one set of arguments.
+thon
+test_this1.py
+import unittest
+from parameterized import parameterized
+class TestMathUnitTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            ("negative", -1.5, -2.0),
+            ("integer", 1, 1.0),
+            ("large fraction", 1.6, 1),
+        ]
+    )
+    def test_floor(self, name, input, expected):
+        assert_equal(math.floor(input), expected)
+
+Now, by default this test will be run 3 times, each time with the last 3 arguments of test_floor being assigned the
+corresponding arguments in the parameter list.
+and you could run just the negative and integer sets of params with:
+
+pytest -k "negative and integer" tests/test_mytest.py
+or all but negative sub-tests, with:
+
+pytest -k "not negative" tests/test_mytest.py
+Besides using the -k filter that was just mentioned, you can find out the exact name of each sub-test and run any
+or all of them using their exact names.
+
+pytest test_this1.py --collect-only -q
+and it will list:
+
+test_this1.py::TestMathUnitTest::test_floor_0_negative
+test_this1.py::TestMathUnitTest::test_floor_1_integer
+test_this1.py::TestMathUnitTest::test_floor_2_large_fraction
+So now you can run just 2 specific sub-tests:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f22b8c64c2050f423adeabe4633237335cc123e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+XLA Integration for TensorFlow Models
+[[open-in-colab]]
+Accelerated Linear Algebra, dubbed XLA, is a compiler for accelerating the runtime of TensorFlow Models. From the official documentation:
+XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear algebra that can accelerate TensorFlow models with potentially no source code changes.
+Using XLA in TensorFlow is simple – it comes packaged inside the tensorflow library, and it can be triggered with the jit_compile argument in any graph-creating function such as tf.function. When using Keras methods like fit() and predict(), you can enable XLA simply by passing the jit_compile argument to model.compile(). However, XLA is not limited to these methods - it can also be used to accelerate any arbitrary tf.function.
+Several TensorFlow methods in 🤗 Transformers have been rewritten to be XLA-compatible, including text generation for models such as GPT2, T5 and OPT, as well as speech processing for models such as Whisper.
+While the exact amount of speed-up is very much model-dependent, for TensorFlow text generation models inside 🤗 Transformers, we noticed a speed-up of ~100x. This document will explain how you can use XLA for these models to get the maximum amount of performance. We’ll also provide links to additional resources if you’re interested to learn more about the benchmarks and our design philosophy behind the XLA integration.
+Running TF functions with XLA
+Let us consider the following model in TensorFlow:
+
+import tensorflow as tf
+model = tf.keras.Sequential(
+    [tf.keras.layers.Dense(10, input_shape=(10,), activation="relu"), tf.keras.layers.Dense(5, activation="softmax")]
+)
+
+The above model accepts inputs having a dimension of (10, ). We can use the model for running a forward pass like so:
+
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae25d8284ee66f4d28a39d08bf6fb8580dc03faa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Generate random inputs for the model.
+batch_size = 16
+input_vector_dim = 10
+random_inputs = tf.random.normal((batch_size, input_vector_dim))
+Run a forward pass.
+_ = model(random_inputs)
+
+In order to run the forward pass with an XLA-compiled function, we’d need to do:
+py
+xla_fn = tf.function(model, jit_compile=True)
+_ = xla_fn(random_inputs)
+The default call() function of the model is used for compiling the XLA graph. But if there’s any other model function you want to compile into XLA that’s also possible with:
+py
+my_xla_fn = tf.function(model.my_xla_fn, jit_compile=True)
+Running a TF text generation model with XLA from 🤗 Transformers
+To enable XLA-accelerated generation within 🤗 Transformers, you need to have a recent version of transformers installed. You can install it by running:
+
+pip install transformers --upgrade
+And then you can run the following code:
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+Will error if the minimal version of Transformers is not installed.
+from transformers.utils import check_min_version
+check_min_version("4.21.0")
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+One line to create an XLA generation function
+xla_generate = tf.function(model.generate, jit_compile=True)
+tokenized_input = tokenizer(input_string, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+Generated -- TensorFlow is an open-source, open-source, distributed-source application # framework for the
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5633cb191769a9be8b5d2b3d5a243bb16aa061de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+As you can notice, enabling XLA on generate() is just a single line of code. The rest of the code remains unchanged. However, there are a couple of gotchas in the above code snippet that are specific to XLA. You need to be aware of those to realize the speed-ups that XLA can bring in. We discuss these in the following section. 
+Gotchas to be aware of
+When you are executing an XLA-enabled function (like xla_generate() above) for the first time, it will internally try to infer the computation graph, which is time-consuming.  This process is known as “tracing”. 
+You might notice that the generation time is not fast. Successive calls of xla_generate() (or any other XLA-enabled function) won’t have to infer the computation graph, given the inputs to the function follow the same shape with which the computation graph was initially built. While this is not a problem for modalities with fixed input shapes (e.g., images), you must pay attention if you are working with variable input shape modalities (e.g., text).
+To ensure xla_generate() always operates with the same input shapes, you can specify the padding arguments when calling the tokenizer. 
+
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+input_string = ["TensorFlow is"]
+xla_generate = tf.function(model.generate, jit_compile=True)
+Here, we call the tokenizer with padding options.
+tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+decoded_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+print(f"Generated -- {decoded_text}")
+
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..751f073a6aebb8819a392dee7de60d006d5aab2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_3.txt
@@ -0,0 +1,25 @@
+This way, you can ensure that the inputs to xla_generate() will always receive inputs with the shape it was traced with and thus leading to speed-ups in the generation time. You can verify this with the code below:
+
+import time
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", padding_side="left", pad_token="")
+model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+xla_generate = tf.function(model.generate, jit_compile=True)
+for input_string in ["TensorFlow is", "TensorFlow is a", "TFLite is a"]:
+    tokenized_input = tokenizer(input_string, pad_to_multiple_of=8, padding=True, return_tensors="tf")
+    start = time.time_ns()
+    generated_tokens = xla_generate(**tokenized_input, num_beams=2)
+    end = time.time_ns()
+    print(f"Execution time -- {(end - start) / 1e6:.1f} ms\n")
+
+On a Tesla T4 GPU, you can expect the outputs like so:
+```bash
+Execution time -- 30819.6 ms
+Execution time -- 79.0 ms
+Execution time -- 78.9 ms
+``
+The first call toxla_generate()` is time-consuming because of tracing, but the successive calls are orders of magnitude faster. Keep in mind that any change in the generation options at any point with trigger re-tracing and thus leading to slow-downs in the generation time. 
+We didn’t cover all the text generation options 🤗 Transformers provides in this document. We encourage you to read the documentation for advanced use cases.
+Additional Resources
+Here, we leave you with some additional resources if you want to delve deeper into XLA in 🤗 Transformers and in general.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc86d9a8ca294b2fceb003850bb5b0e2f8edbcfe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tf_xla.txt_chunk_4.txt
@@ -0,0 +1,7 @@
+This Colab Notebook provides an interactive demonstration if you want to fiddle with the XLA-compatible encoder-decoder (like T5) and decoder-only (like GPT2) text generation models. 
+This blog post provides an overview of the comparison benchmarks for XLA-compatible models along with a friendly introduction to XLA in TensorFlow. 
+This blog post discusses our design philosophy behind adding XLA support to the TensorFlow models in 🤗 Transformers. 
+Recommended posts for learning more about XLA and TensorFlow graphs in general:
+XLA: Optimizing Compiler for Machine Learning
+Introduction to graphs and tf.function
+Better performance with tf.function
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeccbd32640d43837b86b7fb31bdd70f0c448199
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Export to TFLite
+TensorFlow Lite is a lightweight framework for deploying machine learning models 
+on resource-constrained devices, such as mobile phones, embedded systems, and Internet of Things (IoT) devices. 
+TFLite is designed to optimize and run models efficiently on these devices with limited computational power, memory, and 
+power consumption.
+A TensorFlow Lite model is represented in a special efficient portable format identified by the .tflite file extension. 
+🤗 Optimum offers functionality to export 🤗 Transformers models to TFLite through the exporters.tflite module. 
+For the list of supported model architectures, please refer to 🤗 Optimum documentation.
+To export a model to TFLite, install the required dependencies:
+
+pip install optimum[exporters-tf]
+To check out all available arguments, refer to the 🤗 Optimum docs, 
+or view help in command line:
+
+optimum-cli export tflite --help
+To export a model's checkpoint from the 🤗 Hub, for example, google-bert/bert-base-uncased, run the following command:
+
+optimum-cli export tflite --model google-bert/bert-base-uncased --sequence_length 128 bert_tflite/
+You should see the logs indicating progress and showing where the resulting model.tflite is saved, like this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8b971bdcd5200b029da3b47c899111f194d876
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tflite.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+Validating TFLite model
+    -[✓] TFLite model output names match reference model (logits)
+    - Validating TFLite Model output "logits":
+        -[✓] (1, 128, 30522) matches (1, 128, 30522)
+        -[x] values not close enough, max diff: 5.817413330078125e-05 (atol: 1e-05)
+The TensorFlow Lite export succeeded with the warning: The maximum absolute difference between the output of the reference model and the TFLite exported model is not within the set tolerance 1e-05:
+- logits: max diff = 5.817413330078125e-05.
+ The exported model was saved at: bert_tflite
+The example above illustrates exporting a checkpoint from 🤗 Hub. When exporting a local model, first make sure that you 
+saved both the model's weights and tokenizer files in the same directory (local_path). When using CLI, pass the 
+local_path to the model argument instead of the checkpoint name on 🤗 Hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b5b70622b4d710450f89c574bc35be3cf415b01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Summary of the tokenizers
+[[open-in-colab]]
+On this page, we will have a closer look at tokenization.
+
+As we saw in the preprocessing tutorial, tokenizing a text is splitting it into words or
+subwords, which then are converted to ids through a look-up table. Converting words or subwords to ids is
+straightforward, so in this summary, we will focus on splitting a text into words or subwords (i.e. tokenizing a text).
+More specifically, we will look at the three main types of tokenizers used in 🤗 Transformers: Byte-Pair Encoding
+(BPE), WordPiece, and SentencePiece, and show examples
+of which tokenizer type is used by which model.
+Note that on each model page, you can look at the documentation of the associated tokenizer to know which tokenizer
+type was used by the pretrained model. For instance, if we look at [BertTokenizer], we can see
+that the model uses WordPiece.
+Introduction
+Splitting a text into smaller chunks is a task that is harder than it looks, and there are multiple ways of doing so.
+For instance, let's look at the sentence "Don't you love 🤗 Transformers? We sure do."
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80336db085a74e3bae2cbd6e8be754c2a4df52e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+A simple way of tokenizing this text is to split it by spaces, which would give:
+["Don't", "you", "love", "🤗", "Transformers?", "We", "sure", "do."]
+This is a sensible first step, but if we look at the tokens "Transformers?" and "do.", we notice that the
+punctuation is attached to the words "Transformer" and "do", which is suboptimal. We should take the
+punctuation into account so that a model does not have to learn a different representation of a word and every possible
+punctuation symbol that could follow it, which would explode the number of representations the model has to learn.
+Taking punctuation into account, tokenizing our exemplary text would give:
+["Don", "'", "t", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+Better. However, it is disadvantageous, how the tokenization dealt with the word "Don't". "Don't" stands for
+"do not", so it would be better tokenized as ["Do", "n't"]. This is where things start getting complicated, and
+part of the reason each model has its own tokenizer type. Depending on the rules we apply for tokenizing a text, a
+different tokenized output is generated for the same text. A pretrained model only performs properly if you feed it an
+input that was tokenized with the same rules that were used to tokenize its training data.
+spaCy and Moses are two popular
+rule-based tokenizers. Applying them on our example, spaCy and Moses would output something like:
+["Do", "n't", "you", "love", "🤗", "Transformers", "?", "We", "sure", "do", "."]
+As can be seen space and punctuation tokenization, as well as rule-based tokenization, is used here. Space and
+punctuation tokenization and rule-based tokenization are both examples of word tokenization, which is loosely defined
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b19f63a7c68cdd8f777bbade3cc7724cef76720
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_10.txt
@@ -0,0 +1,6 @@
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
+the words \(x_{1}, \dots, x_{N}\) and that the set of all possible tokenizations for a word \(x_{i}\) is
+defined as \(S(x_{i})\), then the overall loss is defined as
+$$\mathcal{L} = -\sum_{i=1}^{N} \log \left ( \sum_{x \in S(x_{i})} p(x) \right )$$
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d9220661b878751cf3caf35c46a6ade20f24118
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+SentencePiece
+All tokenization algorithms described so far have the same problem: It is assumed that the input text uses spaces to
+separate words. However, not all languages use spaces to separate words. One possible solution is to use language
+specific pre-tokenizers, e.g. XLM uses a specific Chinese, Japanese, and Thai pre-tokenizer.
+To solve this problem more generally, SentencePiece: A simple and language independent subword tokenizer and
+detokenizer for Neural Text Processing (Kudo et al., 2018) treats the input
+as a raw input stream, thus including the space in the set of characters to use. It then uses the BPE or unigram
+algorithm to construct the appropriate vocabulary.
+The [XLNetTokenizer] uses SentencePiece for example, which is also why in the example earlier the
+"▁" character was included in the vocabulary. Decoding with SentencePiece is very easy since all tokens can just be
+concatenated and "▁" is replaced by a space.
+All transformers models in the library that use SentencePiece use it in combination with unigram. Examples of models
+using SentencePiece are ALBERT, XLNet, Marian, and T5.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9a3f0504c90b3ee3a4dc63622e05287853215a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+as splitting sentences into words. While it's the most intuitive way to split texts into smaller chunks, this
+tokenization method can lead to problems for massive text corpora. In this case, space and punctuation tokenization
+usually generates a very big vocabulary (the set of all unique words and tokens used). E.g., Transformer XL uses space and punctuation tokenization, resulting in a vocabulary size of 267,735!
+Such a big vocabulary size forces the model to have an enormous embedding matrix as the input and output layer, which
+causes both an increased memory and time complexity. In general, transformers models rarely have a vocabulary size
+greater than 50,000, especially if they are pretrained only on a single language.
+So if simple space and punctuation tokenization is unsatisfactory, why not simply tokenize on characters?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1401f9bba478e60cf2348f18fb3da1d8e5f1dca1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+While character tokenization is very simple and would greatly reduce memory and time complexity it makes it much harder
+for the model to learn meaningful input representations. E.g. learning a meaningful context-independent
+representation for the letter "t" is much harder than learning a context-independent representation for the word
+"today". Therefore, character tokenization is often accompanied by a loss of performance. So to get the best of
+both worlds, transformers models use a hybrid between word-level and character-level tokenization called subword
+tokenization.
+Subword tokenization
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller
+subwords, but rare words should be decomposed into meaningful subwords. For instance "annoyingly" might be
+considered a rare word and could be decomposed into "annoying" and "ly". Both "annoying" and "ly" as
+stand-alone subwords would appear more frequently while at the same time the meaning of "annoyingly" is kept by the
+composite meaning of "annoying" and "ly". This is especially useful in agglutinative languages such as Turkish,
+where you can form (almost) arbitrarily long complex words by stringing together subwords.
+Subword tokenization allows the model to have a reasonable vocabulary size while being able to learn meaningful
+context-independent representations. In addition, subword tokenization enables the model to process words it has never
+seen before, by decomposing them into known subwords. For instance, the [~transformers.BertTokenizer] tokenizes
+"I have a new GPU!" as follows:
+
+from transformers import BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+tokenizer.tokenize("I have a new GPU!")
+["i", "have", "a", "new", "gp", "##u", "!"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..80ed12734c8e6996e40809e36692778b929e0029
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+Because we are considering the uncased model, the sentence was lowercased first. We can see that the words ["i", "have", "a", "new"] are present in the tokenizer's vocabulary, but the word "gpu" is not. Consequently, the
+tokenizer splits "gpu" into known subwords: ["gp" and "##u"]. "##" means that the rest of the token should
+be attached to the previous one, without space (for decoding or reversal of the tokenization).
+As another example, [~transformers.XLNetTokenizer] tokenizes our previously exemplary text as follows:
+
+from transformers import XLNetTokenizer
+tokenizer = XLNetTokenizer.from_pretrained("xlnet/xlnet-base-cased")
+tokenizer.tokenize("Don't you love 🤗 Transformers? We sure do.")
+["▁Don", "'", "t", "▁you", "▁love", "▁", "🤗", "▁", "Transform", "ers", "?", "▁We", "▁sure", "▁do", "."]
+
+We'll get back to the meaning of those "▁" when we look at SentencePiece. As one can see,
+the rare word "Transformers" has been split into the more frequent subwords "Transform" and "ers".
+Let's now look at how the different subword tokenization algorithms work. Note that all of those tokenization
+algorithms rely on some form of training which is usually done on the corpus the corresponding model will be trained
+on.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f181888465e7212c9ba89f98dfb32a218e41c52f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Byte-Pair Encoding (BPE)
+Byte-Pair Encoding (BPE) was introduced in Neural Machine Translation of Rare Words with Subword Units (Sennrich et
+al., 2015). BPE relies on a pre-tokenizer that splits the training data into
+words. Pretokenization can be as simple as space tokenization, e.g. GPT-2, RoBERTa. More advanced pre-tokenization include rule-based tokenization, e.g. XLM,
+FlauBERT which uses Moses for most languages, or GPT which uses
+spaCy and ftfy, to count the frequency of each word in the training corpus.
+After pre-tokenization, a set of unique words has been created and the frequency with which each word occurred in the
+training data has been determined. Next, BPE creates a base vocabulary consisting of all symbols that occur in the set
+of unique words and learns merge rules to form a new symbol from two symbols of the base vocabulary. It does so until
+the vocabulary has attained the desired vocabulary size. Note that the desired vocabulary size is a hyperparameter to
+define before training the tokenizer.
+As an example, let's assume that after pre-tokenization, the following set of words including their frequency has been
+determined:
+("hug", 10), ("pug", 5), ("pun", 12), ("bun", 4), ("hugs", 5)
+Consequently, the base vocabulary is ["b", "g", "h", "n", "p", "s", "u"]. Splitting all words into symbols of the
+base vocabulary, we obtain:
+("h" "u" "g", 10), ("p" "u" "g", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "u" "g" "s", 5)
+BPE then counts the frequency of each possible symbol pair and picks the symbol pair that occurs most frequently. In
+the example above "h" followed by "u" is present 10 + 5 = 15 times (10 times in the 10 occurrences of
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..759cc086c2545f1d7f142a6b2c4629400963d6a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+"hug", 5 times in the 5 occurrences of "hugs"). However, the most frequent symbol pair is "u" followed by
+"g", occurring 10 + 5 + 5 = 20 times in total. Thus, the first merge rule the tokenizer learns is to group all
+"u" symbols followed by a "g" symbol together. Next, "ug" is added to the vocabulary. The set of words then
+becomes
+("h" "ug", 10), ("p" "ug", 5), ("p" "u" "n", 12), ("b" "u" "n", 4), ("h" "ug" "s", 5)
+BPE then identifies the next most common symbol pair. It's "u" followed by "n", which occurs 16 times. "u",
+"n" is merged to "un" and added to the vocabulary. The next most frequent symbol pair is "h" followed by
+"ug", occurring 15 times. Again the pair is merged and "hug" can be added to the vocabulary.
+At this stage, the vocabulary is ["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"] and our set of unique words
+is represented as
+("hug", 10), ("p" "ug", 5), ("p" "un", 12), ("b" "un", 4), ("hug" "s", 5)
+Assuming, that the Byte-Pair Encoding training would stop at this point, the learned merge rules would then be applied
+to new words (as long as those new words do not include symbols that were not in the base vocabulary). For instance,
+the word "bug" would be tokenized to ["b", "ug"] but "mug" would be tokenized as ["<unk>", "ug"] since
+the symbol "m" is not in the base vocabulary. In general, single letters such as "m" are not replaced by the
+"<unk>" symbol because the training data usually includes at least one occurrence of each letter, but it is likely
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2cdc1d7283c51e1510ab76d666a1f9ff0915341
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+to happen for very special characters like emojis.
+As mentioned earlier, the vocabulary size, i.e. the base vocabulary size + the number of merges, is a hyperparameter
+to choose. For instance GPT has a vocabulary size of 40,478 since they have 478 base characters
+and chose to stop training after 40,000 merges.
+Byte-level BPE
+A base vocabulary that includes all possible base characters can be quite large if e.g. all unicode characters are
+considered as base characters. To have a better base vocabulary, GPT-2 uses bytes
+as the base vocabulary, which is a clever trick to force the base vocabulary to be of size 256 while ensuring that
+every base character is included in the vocabulary. With some additional rules to deal with punctuation, the GPT2's
+tokenizer can tokenize every text without the need for the  symbol. GPT-2 has a vocabulary
+size of 50,257, which corresponds to the 256 bytes base tokens, a special end-of-text token and the symbols learned
+with 50,000 merges.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1fb57ec8f998537f0e2caaad7dcd1150c9b56a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_8.txt
@@ -0,0 +1,12 @@
+WordPiece
+WordPiece is the subword tokenization algorithm used for BERT, DistilBERT, and Electra. The algorithm was outlined in Japanese and Korean
+Voice Search (Schuster et al., 2012) and is very similar to
+BPE. WordPiece first initializes the vocabulary to include every character present in the training data and
+progressively learns a given number of merge rules. In contrast to BPE, WordPiece does not choose the most frequent
+symbol pair, but the one that maximizes the likelihood of the training data once added to the vocabulary.
+So what does this mean exactly? Referring to the previous example, maximizing the likelihood of the training data is
+equivalent to finding the symbol pair, whose probability divided by the probabilities of its first symbol followed by
+its second symbol is the greatest among all symbol pairs. E.g. "u", followed by "g" would have only been
+merged if the probability of "ug" divided by "u", "g" would have been greater than for any other symbol
+pair. Intuitively, WordPiece is slightly different to BPE in that it evaluates what it loses by merging two symbols
+to ensure it's worth it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..133c3ba677339a58a49e1c015361802c2f2f0219
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_tokenizer_summary.txt_chunk_9.txt
@@ -0,0 +1,22 @@
+Unigram
+Unigram is a subword tokenization algorithm introduced in Subword Regularization: Improving Neural Network Translation
+Models with Multiple Subword Candidates (Kudo, 2018). In contrast to BPE or
+WordPiece, Unigram initializes its base vocabulary to a large number of symbols and progressively trims down each
+symbol to obtain a smaller vocabulary. The base vocabulary could for instance correspond to all pre-tokenized words and
+the most common substrings. Unigram is not used directly for any of the models in the transformers, but it's used in
+conjunction with SentencePiece.
+At each training step, the Unigram algorithm defines a loss (often defined as the log-likelihood) over the training
+data given the current vocabulary and a unigram language model. Then, for each symbol in the vocabulary, the algorithm
+computes how much the overall loss would increase if the symbol was to be removed from the vocabulary. Unigram then
+removes p (with p usually being 10% or 20%) percent of the symbols whose loss increase is the lowest, i.e. those
+symbols that least affect the overall loss over the training data. This process is repeated until the vocabulary has
+reached the desired size. The Unigram algorithm always keeps the base characters so that any word can be tokenized.
+Because Unigram is not based on merge rules (in contrast to BPE and WordPiece), the algorithm has several ways of
+tokenizing new text after training. As an example, if a trained Unigram tokenizer exhibits the vocabulary:
+["b", "g", "h", "n", "p", "s", "u", "ug", "un", "hug"],
+"hugs" could be tokenized both as ["hug", "s"], ["h", "ug", "s"] or ["h", "u", "g", "s"]. So which one
+to choose? Unigram saves the probability of each token in the training corpus on top of saving the vocabulary so that
+the probability of each possible tokenization can be computed after training. The algorithm simply picks the most
+likely tokenization in practice, but also offers the possibility to sample a possible tokenization according to their
+probabilities.
+Those probabilities are defined by the loss the tokenizer is trained on. Assuming that the training data consists of
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2fe7208b3a6623c34f5775f0071688c18e65f92
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Export to TorchScript
+
+This is the very beginning of our experiments with TorchScript and we are still
+exploring its capabilities with variable-input-size models. It is a focus of interest to
+us and we will deepen our analysis in upcoming releases, with more code examples, a more
+flexible implementation, and benchmarks comparing Python-based codes with compiled
+TorchScript.
+
+According to the TorchScript documentation:
+
+TorchScript is a way to create serializable and optimizable models from PyTorch code.
+
+There are two PyTorch modules, JIT and
+TRACE, that allow developers to export their
+models to be reused in other programs like efficiency-oriented C++ programs.
+We provide an interface that allows you to export 🤗 Transformers models to TorchScript
+so they can be reused in a different environment than PyTorch-based Python programs.
+Here, we explain how to export and use our models using TorchScript.
+Exporting a model requires two things:
+
+model instantiation with the torchscript flag
+a forward pass with dummy inputs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2828418ba34727b635c50f0ae3676fad56904b2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+These necessities imply several things developers should be careful about as detailed
+below.
+TorchScript flag and tied weights
+The torchscript flag is necessary because most of the 🤗 Transformers language models
+have tied weights between their Embedding layer and their Decoding layer.
+TorchScript does not allow you to export models that have tied weights, so it is
+necessary to untie and clone the weights beforehand.
+Models instantiated with the torchscript flag have their Embedding layer and
+Decoding layer separated, which means that they should not be trained down the line.
+Training would desynchronize the two layers, leading to unexpected results.
+This is not the case for models that do not have a language model head, as those do not
+have tied weights. These models can be safely exported without the torchscript flag.
+Dummy inputs and standard lengths
+The dummy inputs are used for a models forward pass. While the inputs' values are
+propagated through the layers, PyTorch keeps track of the different operations executed
+on each tensor. These recorded operations are then used to create the trace of the
+model.
+The trace is created relative to the inputs' dimensions. It is therefore constrained by
+the dimensions of the dummy input, and will not work for any other sequence length or
+batch size. When trying with a different size, the following error is raised:
+`The expanded size of the tensor (3) must match the existing size (7) at non-singleton dimension 2`
+We recommended you trace the model with a dummy input size at least as large as the
+largest input that will be fed to the model during inference. Padding can help fill the
+missing values. However, since the model is traced with a larger input size, the
+dimensions of the matrix will also be large, resulting in more calculations.
+Be careful of the total number of operations done on each input and follow the
+performance closely when exporting varying sequence-length models.
+Using TorchScript in Python
+This section demonstrates how to save and load models as well as how to use the trace
+for inference.
+Saving a model
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c579042af68cc9b2dff9fb47b529f2edd071b6e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+To export a BertModel with TorchScript, instantiate BertModel from the BertConfig
+class and then save it to disk under the filename traced_bert.pt:
+thon
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+Tokenizing input text
+text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
+tokenized_text = enc.tokenize(text)
+Masking one of the input tokens
+masked_index = 8
+tokenized_text[masked_index] = "[MASK]"
+indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
+segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
+Creating a dummy input
+tokens_tensor = torch.tensor([indexed_tokens])
+segments_tensors = torch.tensor([segments_ids])
+dummy_input = [tokens_tensor, segments_tensors]
+Initializing the model with the torchscript flag
+Flag set to True even though it is not necessary as this model does not have an LM Head.
+config = BertConfig(
+    vocab_size_or_config_json_file=32000,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    torchscript=True,
+)
+Instantiating the model
+model = BertModel(config)
+The model needs to be in evaluation mode
+model.eval()
+If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag
+model = BertModel.from_pretrained("google-bert/bert-base-uncased", torchscript=True)
+Creating the trace
+traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
+torch.jit.save(traced_model, "traced_bert.pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5a942839972873081c1ad10d627ee728ce6d0bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+Loading a model
+Now you can load the previously saved BertModel, traced_bert.pt, from disk and use
+it on the previously initialised dummy_input:
+thon
+loaded_model = torch.jit.load("traced_bert.pt")
+loaded_model.eval()
+all_encoder_layers, pooled_output = loaded_model(*dummy_input)
+
+Using a traced model for inference
+Use the traced model for inference by using its __call__ dunder method:
+python
+traced_model(tokens_tensor, segments_tensors)
+Deploy Hugging Face TorchScript models to AWS with the Neuron SDK
+AWS introduced the Amazon EC2 Inf1
+instance family for low cost, high performance machine learning inference in the cloud.
+The Inf1 instances are powered by the AWS Inferentia chip, a custom-built hardware
+accelerator, specializing in deep learning inferencing workloads. AWS
+Neuron is the SDK for
+Inferentia that supports tracing and optimizing transformers models for deployment on
+Inf1. The Neuron SDK provides:
+
+Easy-to-use API with one line of code change to trace and optimize a TorchScript
+   model for inference in the cloud.
+Out of the box performance optimizations for improved
+   cost-performance.
+Support for Hugging Face transformers models built with either
+   PyTorch
+   or
+   TensorFlow.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66f07387277b62e42b688bc071f1c6b6ee336662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_torchscript.txt_chunk_4.txt
@@ -0,0 +1,36 @@
+Implications
+Transformers models based on the BERT (Bidirectional Encoder Representations from
+Transformers)
+architecture, or its variants such as
+distilBERT and
+roBERTa run best on
+Inf1 for non-generative tasks such as extractive question answering, sequence
+classification, and token classification. However, text generation tasks can still be
+adapted to run on Inf1 according to this AWS Neuron MarianMT
+tutorial.
+More information about models that can be converted out of the box on Inferentia can be
+found in the Model Architecture
+Fit
+section of the Neuron documentation.
+Dependencies
+Using AWS Neuron to convert models requires a Neuron SDK
+environment
+which comes preconfigured on AWS Deep Learning
+AMI.
+Converting a model for AWS Neuron
+Convert a model for AWS NEURON using the same code from Using TorchScript in
+Python to trace a BertModel. Import the
+torch.neuron framework extension to access the components of the Neuron SDK through a
+Python API:
+python
+from transformers import BertModel, BertTokenizer, BertConfig
+import torch
+import torch.neuron
+You only need to modify the following line:
+diff
+- torch.jit.trace(model, [tokens_tensor, segments_tensors])
++ torch.neuron.trace(model, [token_tensor, segments_tensors])
+This enables the Neuron SDK to trace the model and optimize it for Inf1 instances.
+To learn more about AWS Neuron SDK features, tools, example tutorials and latest
+updates, please see the AWS NeuronSDK
+documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c58328c134d206f9855e0251c0cc7e87b290a5c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Trainer
+The [Trainer] is a complete training and evaluation loop for PyTorch models implemented in the Transformers library. You only need to pass it the necessary pieces for training (model, tokenizer, dataset, evaluation function, training hyperparameters, etc.), and the [Trainer] class takes care of the rest. This makes it easier to start training faster without manually writing your own training loop. But at the same time, [Trainer] is very customizable and offers a ton of training options so you can tailor it to your exact training needs.
+
+In addition to the [Trainer] class, Transformers also provides a [Seq2SeqTrainer] class for sequence-to-sequence tasks like translation or summarization. There is also the [~trl.SFTTrainer] class from the TRL library which wraps the [Trainer] class and is optimized for training language models like Llama-2 and Mistral with autoregressive techniques. [~trl.SFTTrainer] also supports features like sequence packing, LoRA, quantization, and DeepSpeed for efficiently scaling to any model size.
+
+Feel free to check out the API reference for these other [Trainer]-type classes to learn more about when to use which one. In general, [Trainer] is the most versatile option and is appropriate for a broad spectrum of tasks. [Seq2SeqTrainer] is designed for sequence-to-sequence tasks and [~trl.SFTTrainer] is designed for training language models.
+
+Before you start, make sure Accelerate - a library for enabling and running PyTorch training across distributed environments - is installed.
+```bash
+pip install accelerate
+upgrade
+pip install accelerate --upgrade
+
+This guide provides an overview of the [Trainer] class.
+Basic usage
+[Trainer] includes all the code you'll find in a basic training loop:
+
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6db382573c2b675197c113482f397dff583d0216
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+perform a training step to calculate the loss
+calculate the gradients with the [~accelerate.Accelerator.backward] method
+update the weights based on the gradients
+repeat this process until you've reached a predetermined number of epochs
+
+The [Trainer] class abstracts all of this code away so you don't have to worry about manually writing a training loop every time or if you're just getting started with PyTorch and training. You only need to provide the essential components required for training, such as a model and a dataset, and the [Trainer] class handles everything else.
+If you want to specify any training options or hyperparameters, you can find them in the [TrainingArguments] class. For example, let's define where to save the model in output_dir and push the model to the Hub after training with push_to_hub=True.
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="your-model",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=2,
+    weight_decay=0.01,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    push_to_hub=True,
+)
+
+Pass training_args to the [Trainer] along with a model, dataset, something to preprocess the dataset with (depending on your data type it could be a tokenizer, feature extractor or image processor), a data collator, and a function to compute the metrics you want to track during training.
+Finally, call [~Trainer.train] to start training!
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+
+Checkpoints
+The [Trainer] class saves your model checkpoints to the directory specified in the output_dir parameter of [TrainingArguments]. You'll find the checkpoints saved in a checkpoint-000 subfolder where the numbers at the end correspond to the training step. Saving checkpoints are useful for resuming training later.
+
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e3805408611bca95e00dea350a586b3471d031b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_10.txt
@@ -0,0 +1,39 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+deepspeed_config:                                                                                                              
+  gradient_accumulation_steps: 1
+  gradient_clipping: 0.7
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+The accelerate_launch command is the recommended way to launch your training script on a distributed system with Accelerate and [Trainer] with the parameters specified in config_file.yaml. This file is saved to the Accelerate cache folder and automatically loaded when you run accelerate_launch.
+For example, to run the run_glue.py training script with the FSDP configuration:
+
+accelerate launch \
+    ./examples/pytorch/text-classification/run_glue.py \
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+You could also specify the parameters from the config_file.yaml file directly in the command line:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12aeb80e5bf0df0a695e0ca2c926bae271a1b854
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_11.txt
@@ -0,0 +1,19 @@
+accelerate launch --num_processes=2 \
+    --use_fsdp \
+    --mixed_precision=bf16 \
+    --fsdp_auto_wrap_policy=TRANSFORMER_BASED_WRAP  \
+    --fsdp_transformer_layer_cls_to_wrap="BertLayer" \
+    --fsdp_sharding_strategy=1 \
+    --fsdp_state_dict_type=FULL_STATE_DICT \
+    ./examples/pytorch/text-classification/run_glue.py
+    --model_name_or_path google-bert/bert-base-cased \
+    --task_name $TASK_NAME \
+    --do_train \
+    --do_eval \
+    --max_seq_length 128 \
+    --per_device_train_batch_size 16 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3 \
+    --output_dir /tmp/$TASK_NAME/ \
+    --overwrite_output_dir
+Check out the Launching your Accelerate scripts tutorial to learn more about accelerate_launch and custom configurations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71637531380491ad7d6558ff1c3aa35f33d736d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_2.txt
@@ -0,0 +1,13 @@
+resume from latest checkpoint
+trainer.train(resume_from_checkpoint=True)
+resume from specific checkpoint saved in output directory
+trainer.train(resume_from_checkpoint="your-model/checkpoint-1000")
+
+You can save your checkpoints (the optimizer state is not saved by default) to the Hub by setting push_to_hub=True in [TrainingArguments] to commit and push them. Other options for deciding how your checkpoints are saved are set up in the hub_strategy parameter:
+
+hub_strategy="checkpoint" pushes the latest checkpoint to a subfolder named "last-checkpoint" from which you can resume training
+hub_strategy="all_checkpoints" pushes all checkpoints to the directory defined in output_dir (you'll see one checkpoint per folder in your model repository)
+
+When you resume training from a checkpoint, the [Trainer] tries to keep the Python, NumPy, and PyTorch RNG states the same as they were when the checkpoint was saved. But because PyTorch has various non-deterministic default settings, the RNG states aren't guaranteed to be the same. If you want to enable full determinism, take a look at the Controlling sources of randomness guide to learn what you can enable to make your training fully deterministic. Keep in mind though that by making certain settings deterministic, training may be slower.
+Customize the Trainer
+While the [Trainer] class is designed to be accessible and easy-to-use, it also offers a lot of customizability for more adventurous users. Many of the [Trainer]'s method can be subclassed and overridden to support the functionality you want, without having to rewrite the entire training loop from scratch to accommodate it. These methods include:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10be402e4dfb58ba62dbd440a0896b301b22ed47
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+[~Trainer.get_train_dataloader] creates a training DataLoader
+[~Trainer.get_eval_dataloader] creates an evaluation DataLoader
+[~Trainer.get_test_dataloader] creates a test DataLoader
+[~Trainer.log] logs information on the various objects that watch training
+[~Trainer.create_optimizer_and_scheduler] creates an optimizer and learning rate scheduler if they weren't passed in the __init__; these can also be separately customized with [~Trainer.create_optimizer] and [~Trainer.create_scheduler] respectively
+[~Trainer.compute_loss] computes the loss on a batch of training inputs
+[~Trainer.training_step] performs the training step
+[~Trainer.prediction_step] performs the prediction and test step
+[~Trainer.evaluate] evaluates the model and returns the evaluation metrics
+[~Trainer.predict] makes predictions (with metrics if labels are available) on the test set
+
+For example, if you want to customize the [~Trainer.compute_loss] method to use a weighted loss instead.
+
+from torch import nn
+from transformers import Trainer
+class CustomTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits")
+        # compute custom loss for 3 labels with different weights
+        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0], device=model.device))
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
+        return (loss, outputs) if return_outputs else loss
+
+Callbacks
+Another option for customizing the [Trainer] is to use callbacks. Callbacks don't change anything in the training loop. They inspect the training loop state and then execute some action (early stopping, logging results, etc.) depending on the state. In other words, a callback can't be used to implement something like a custom loss function and you'll need to subclass and override the [~Trainer.compute_loss] method for that.
+For example, if you want to add an early stopping callback to the training loop after 10 steps.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0cf7e640a885f60ceaf7afedf785ccdbf691c52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_4.txt
@@ -0,0 +1,50 @@
+from transformers import TrainerCallback
+class EarlyStoppingCallback(TrainerCallback):
+    def init(self, num_steps=10):
+        self.num_steps = num_steps
+def on_step_end(self, args, state, control, **kwargs):
+    if state.global_step >= self.num_steps:
+        return {"should_training_stop": True}
+    else:
+        return {}
+
+Then pass it to the [Trainer]'s callback parameter.
+
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    tokenizer=tokenizer,
+    data_collator=data_collator,
+    compute_metrics=compute_metrics,
+    callback=[EarlyStoppingCallback()],
+)
+
+Logging
+
+Check out the logging API reference for more information about the different logging levels.
+
+The [Trainer] is set to logging.INFO by default which reports errors, warnings, and other basic information. A [Trainer] replica - in distributed environments - is set to logging.WARNING which only reports errors and warnings. You can change the logging level with the log_level and log_level_replica parameters in [TrainingArguments].
+To configure the log level setting for each node, use the log_on_each_node parameter to determine whether to use the log level on each node or only on the main node.
+
+[Trainer] sets the log level separately for each node in the [Trainer.__init__] method, so you may want to consider setting this sooner if you're using other Transformers functionalities before creating the [Trainer] object.
+
+For example, to set your main code and modules to use the same log level according to each node:
+
+logger = logging.getLogger(name)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+log_level = training_args.get_process_log_level()
+logger.setLevel(log_level)
+datasets.utils.logging.set_verbosity(log_level)
+transformers.utils.logging.set_verbosity(log_level)
+trainer = Trainer()
+
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddd669a97036fe5379ed5d50189e6266a209dda0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+Use different combinations of log_level and log_level_replica to configure what gets logged on each of the nodes.
+
+my_app.py  --log_level warning --log_level_replica error
+
+Add the log_on_each_node 0 parameter for multi-node environments.
+```bash
+my_app.py  --log_level warning --log_level_replica error --log_on_each_node 0
+set to only report errors
+my_app.py  --log_level error --log_level_replica error --log_on_each_node 0
+
+NEFTune
+NEFTune is a technique that can improve performance by adding noise to the embedding vectors during training. To enable it in [Trainer], set the neftune_noise_alpha parameter in [TrainingArguments] to control how much noise is added.
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(, neftune_noise_alpha=0.1)
+trainer = Trainer(, args=training_args)
+
+NEFTune is disabled after training to restore the original embedding layer to avoid any unexpected behavior.
+GaLore
+Gradient Low-Rank Projection (GaLore) is a memory-efficient low-rank training strategy that allows full-parameter learning but is more memory-efficient than common low-rank adaptation methods, such as LoRA.
+First make sure to install GaLore official repository:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..938e9d4e1d60a0dd23f89521f6d89e2b3f6f9d59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_6.txt
@@ -0,0 +1,55 @@
+pip install galore-torch
+Then simply add one of ["galore_adamw", "galore_adafactor", "galore_adamw_8bit"] in optim together with optim_target_modules, which can be a list of strings, regex or full path corresponding to the target module names you want to adapt. Below is an end-to-end example script (make sure to pip install trl datasets):
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+To pass extra arguments supports by GaLore, you should pass correctly optim_args, for example:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw",
+    optim_target_modules=["attn", "mlp"],
+    optim_args="rank=64, update_proj_gap=100, scale=0.10",
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df935d05b5194cec6b3da23ea0665ff8a510230
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_7.txt
@@ -0,0 +1,34 @@
+You can read more about the method in the original repository or the paper.
+Currently you can only train Linear layers that are considered as GaLore layers and will use low-rank decomposition to be trained while remaining layers will be optimized in the conventional manner.
+Note it will take a bit of time before starting the training (~3 minutes for a 2B model on a NVIDIA A100), but training should go smoothly afterwards.
+You can also perform layer-wise optimization by post-pending the optimizer name with layerwise like below:
+thon
+import torch
+import datasets
+import trl
+from transformers import TrainingArguments, AutoConfig, AutoTokenizer, AutoModelForCausalLM
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-galore",
+    max_steps=100,
+    per_device_train_batch_size=2,
+    optim="galore_adamw_layerwise",
+    optim_target_modules=["attn", "mlp"]
+)
+model_id = "google/gemma-2b"
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_config(config).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=512,
+)
+trainer.train()
+
+Note layerwise optimization is a bit experimental and does not support DDP (Distributed Data Parallel), thus you can run the training script only on a single GPU. Please see this appropriate section for more details. Other features such as gradient clipping, DeepSpeed, etc might not be supported out of the box. Please raise an issue on GitHub if you encounter such issue.
+LOMO optimizer
+The LOMO optimizers have been introduced in Full Parameter Fine-Tuning for Large Language Models with Limited Resources and AdaLomo: Low-memory Optimization with Adaptive Learning Rate. 
+They both consist of an efficient full-parameter fine-tuning method. These optimizers fuse the gradient computation and the parameter update in one step to reduce memory usage. Supported optimizers for LOMO are "lomo" and "adalomo". First either install LOMO from pypi pip install lomo-optim or install it from source with pip install git+https://github.com/OpenLMLab/LOMO.git.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdded4b9cffa9042aef60bf7b34e29a3f5a3cab4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_8.txt
@@ -0,0 +1,39 @@
+According to the authors, it is recommended to use AdaLomo without grad_norm to get better performance and higher throughput.
+
+Below is a simple script to demonstrate how to fine-tune google/gemma-2b on IMDB dataset in full precision:
+thon
+import torch
+import datasets
+from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
+import trl
+train_dataset = datasets.load_dataset('imdb', split='train')
+args = TrainingArguments(
+    output_dir="./test-lomo",
+    max_steps=1000,
+    per_device_train_batch_size=4,
+    optim="adalomo",
+    gradient_checkpointing=True,
+    logging_strategy="steps",
+    logging_steps=1,
+    learning_rate=2e-6,
+    save_strategy="no",
+    run_name="lomo-imdb",
+)
+model_id = "google/gemma-2b"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True).to(0)
+trainer = trl.SFTTrainer(
+    model=model, 
+    args=args,
+    train_dataset=train_dataset,
+    dataset_text_field='text',
+    max_seq_length=1024,
+)
+trainer.train()
+
+Accelerate and Trainer
+The [Trainer] class is powered by Accelerate, a library for easily training PyTorch models in distributed environments with support for integrations such as FullyShardedDataParallel (FSDP) and DeepSpeed.
+
+Learn more about FSDP sharding strategies, CPU offloading, and more with the [Trainer] in the Fully Sharded Data Parallel guide.
+
+To use Accelerate with [Trainer], run the accelerate.config command to set up training for your training environment. This command creates a config_file.yaml that'll be used when you launch your training script. For example, some example configurations you can setup are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b80804fa802827d38bee9bcb17b1941681e61c3a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_trainer.txt_chunk_9.txt
@@ -0,0 +1,62 @@
+yml
+compute_environment: LOCAL_MACHINE                                                                                             
+distributed_type: MULTI_GPU                                                                                                    
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0 #change rank as per the node
+main_process_ip: 192.168.20.1
+main_process_port: 9898
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch_policy: BACKWARD_PRE
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: 1
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_transformer_layer_cls_to_wrap: BertLayer
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+
+yml
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  deepspeed_config_file: /home/user/configs/ds_zero3_config.json
+  zero3_init_flag: true
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 4
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c16d5693df86b4a5574e4104c0ed64fef09d85a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Fine-tune a pretrained model
+[[open-in-colab]]
+There are significant benefits to using a pretrained model. It reduces computation costs, your carbon footprint, and allows you to use state-of-the-art models without having to train one from scratch. 🤗 Transformers provides access to thousands of pretrained models for a wide range of tasks. When you use a pretrained model, you train it on a dataset specific to your task. This is known as fine-tuning, an incredibly powerful training technique. In this tutorial, you will fine-tune a pretrained model with a deep learning framework of your choice:
+
+Fine-tune a pretrained model with 🤗 Transformers [Trainer].
+Fine-tune a pretrained model in TensorFlow with Keras.
+Fine-tune a pretrained model in native PyTorch.
+
+Prepare a dataset
+
+Before you can fine-tune a pretrained model, download a dataset and prepare it for training. The previous tutorial showed you how to process data for training, and now you get an opportunity to put those skills to the test!
+Begin by loading the Yelp Reviews dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a70d52b28fa33104816e993cc599fa39b292354
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+from datasets import load_dataset
+dataset = load_dataset("yelp_review_full")
+dataset["train"][100]
+{'label': 0,
+ 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularlythat takes something special!\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \"serving off their orders\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\nI\'ve eaten at various McDonalds restaurants for over 30 years. I\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}
+
+As you now know, you need a tokenizer to process the text and include a padding and truncation strategy to handle any variable sequence lengths. To process your dataset in one step, use 🤗 Datasets map method to apply a preprocessing function over the entire dataset:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+def tokenize_function(examples):
+     return tokenizer(examples["text"], padding="max_length", truncation=True)
+tokenized_datasets = dataset.map(tokenize_function, batched=True)
+
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5e62d6a06ceb93ff72eea453161935381f835a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+If you like, you can create a smaller subset of the full dataset to fine-tune on to reduce the time it takes:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+Train
+At this point, you should follow the section corresponding to the framework you want to use. You can use the links
+in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,
+just use the button at the top-right of that framework's block!
+
+Train with PyTorch Trainer
+🤗 Transformers provides a [Trainer] class optimized for training 🤗 Transformers models, making it easier to start training without manually writing your own training loop. The [Trainer] API supports a wide range of training options and features such as logging, gradient accumulation, and mixed precision.
+Start by loading your model and specify the number of expected labels. From the Yelp Review dataset card, you know there are five labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+You will see a warning about some of the pretrained weights not being used and some weights being randomly
+initialized. Don't worry, this is completely normal! The pretrained head of the BERT model is discarded, and replaced with a randomly initialized classification head. You will fine-tune this new model head on your sequence classification task, transferring the knowledge of the pretrained model to it.
+
+Training hyperparameters
+Next, create a [TrainingArguments] class which contains all the hyperparameters you can tune as well as flags for activating different training options. For this tutorial you can start with the default training hyperparameters, but feel free to experiment with these to find your optimal settings.
+Specify where to save the checkpoints from your training:
+
+from transformers import TrainingArguments
+training_args = TrainingArguments(output_dir="test_trainer")
+
+Evaluate
+[Trainer] does not automatically evaluate model performance during training. You'll need to pass [Trainer] a function to compute and report metrics. The 🤗 Evaluate library provides a simple accuracy function you can load with the [evaluate.load] (see this quicktour for more information) function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6055f6d51a254e1fc200c6fe76df67b5c9fa35b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_3.txt
@@ -0,0 +1,46 @@
+import numpy as np
+import evaluate
+metric = evaluate.load("accuracy")
+
+Call [~evaluate.compute] on metric to calculate the accuracy of your predictions. Before passing your predictions to compute, you need to convert the logits to predictions (remember all 🤗 Transformers models return logits):
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     predictions = np.argmax(logits, axis=-1)
+     return metric.compute(predictions=predictions, references=labels)
+
+If you'd like to monitor your evaluation metrics during fine-tuning, specify the eval_strategy parameter in your training arguments to report the evaluation metric at the end of each epoch:
+
+from transformers import TrainingArguments, Trainer
+training_args = TrainingArguments(output_dir="test_trainer", eval_strategy="epoch")
+
+Trainer
+Create a [Trainer] object with your model, training arguments, training and test datasets, and evaluation function:
+
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=small_train_dataset,
+     eval_dataset=small_eval_dataset,
+     compute_metrics=compute_metrics,
+ )
+
+Then fine-tune your model by calling [~transformers.Trainer.train]:
+
+trainer.train()
+
+Train a TensorFlow model with Keras
+You can also train 🤗 Transformers models in TensorFlow with the Keras API!
+Loading data for Keras
+When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that
+Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.
+Let's try that first before we do anything more complicated.
+First, load a dataset. We'll use the CoLA dataset from the GLUE benchmark,
+since it's a simple binary text classification task, and just take the training split for now.
+
+from datasets import load_dataset
+dataset = load_dataset("glue", "cola")
+dataset = dataset["train"]  # Just take the training split for now
+
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88cf0b0275861f66aaf8dff401f8ec6e4f856765
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_4.txt
@@ -0,0 +1,32 @@
+Next, load a tokenizer and tokenize the data as NumPy arrays. Note that the labels are already a list of 0 and 1s,
+so we can just convert that directly to a NumPy array without tokenization!
+
+from transformers import AutoTokenizer
+import numpy as np
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
+tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
+Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
+tokenized_data = dict(tokenized_data)
+labels = np.array(dataset["label"])  # Label is already an array of 0 and 1
+
+Finally, load, compile, and fit the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSequenceClassification
+from tensorflow.keras.optimizers import Adam
+Load and compile our model
+model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")
+Lower learning rates are often better for fine-tuning transformers
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tokenized_data, labels)
+
+You don't have to pass a loss argument to your models when you compile() them! Hugging Face models automatically
+choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always
+override this by specifying a loss yourself if you want to!
+
+This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?
+Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle
+“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole
+dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!
+Loading data as a tf.data.Dataset
+If you want to avoid slowing down training, you can load your data as a tf.data.Dataset instead. Although you can write your own
+tf.data pipeline if you want, we have two convenience methods for doing this:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..948e47d2564e28472b4ca0fb99f9d876764ea01b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_5.txt
@@ -0,0 +1,33 @@
+[~TFPreTrainedModel.prepare_tf_dataset]: This is the method we recommend in most cases. Because it is a method
+on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and
+discard the others to make a simpler, more performant dataset.
+[~datasets.Dataset.to_tf_dataset]: This method is more low-level, and is useful when you want to exactly control how
+your dataset is created, by specifying exactly which columns and label_cols to include.
+
+Before you can use [~TFPreTrainedModel.prepare_tf_dataset], you will need to add the tokenizer outputs to your dataset as columns, as shown in
+the following code sample:
+
+def tokenize_dataset(data):
+    # Keys of the returned dictionary will be added to the dataset as columns
+    return tokenizer(data["text"])
+dataset = dataset.map(tokenize_dataset)
+
+Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the
+columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly
+reduces the number of padding tokens compared to padding the entire dataset.
+
+tf_dataset = model.prepare_tf_dataset(dataset["train"], batch_size=16, shuffle=True, tokenizer=tokenizer)
+
+Note that in the code sample above, you need to pass the tokenizer to prepare_tf_dataset so it can correctly pad batches as they're loaded.
+If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.
+If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language
+modelling), you can use the collate_fn argument instead to pass a function that will be called to transform the
+list of samples into a batch and apply any preprocessing you want. See our
+examples or
+notebooks to see this approach in action.
+Once you've created a tf.data.Dataset, you can compile and fit the model as before:
+
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d2e3210b09c870ce59c0cf73d0ce90d8af3819d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_6.txt
@@ -0,0 +1,49 @@
+model.compile(optimizer=Adam(3e-5))  # No loss argument!
+model.fit(tf_dataset)
+
+Train in native PyTorch
+
+[Trainer] takes care of the training loop and allows you to fine-tune a model in a single line of code. For users who prefer to write their own training loop, you can also fine-tune a 🤗 Transformers model in native PyTorch.
+At this point, you may need to restart your notebook or execute the following code to free some memory:
+py
+del model
+del trainer
+torch.cuda.empty_cache()
+Next, manually postprocess tokenized_dataset to prepare it for training.
+
+Remove the text column because the model does not accept raw text as an input:
+
+tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+
+Rename the label column to labels because the model expects the argument to be named labels:
+
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
+
+Set the format of the dataset to return PyTorch tensors instead of lists:
+
+tokenized_datasets.set_format("torch")
+
+Then create a smaller subset of the dataset as previously shown to speed up the fine-tuning:
+
+small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
+small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
+
+DataLoader
+Create a DataLoader for your training and test datasets so you can iterate over batches of data:
+
+from torch.utils.data import DataLoader
+train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
+eval_dataloader = DataLoader(small_eval_dataset, batch_size=8)
+
+Load your model with the number of expected labels:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased", num_labels=5)
+
+Optimizer and learning rate scheduler
+Create an optimizer and learning rate scheduler to fine-tune the model. Let's use the AdamW optimizer from PyTorch:
+
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad9d0ef88b70fd2dedd12dc6ebd63ca1ce42fcc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_7.txt
@@ -0,0 +1,58 @@
+from torch.optim import AdamW
+optimizer = AdamW(model.parameters(), lr=5e-5)
+
+Create the default learning rate scheduler from [Trainer]:
+
+from transformers import get_scheduler
+num_epochs = 3
+num_training_steps = num_epochs * len(train_dataloader)
+lr_scheduler = get_scheduler(
+     name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
+ )
+
+Lastly, specify device to use a GPU if you have access to one. Otherwise, training on a CPU may take several hours instead of a couple of minutes.
+
+import torch
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+model.to(device)
+
+Get free access to a cloud GPU if you don't have one with a hosted notebook like Colaboratory or SageMaker StudioLab.
+
+Great, now you are ready to train! 🥳 
+Training loop
+To keep track of your training progress, use the tqdm library to add a progress bar over the number of training steps:
+
+from tqdm.auto import tqdm
+progress_bar = tqdm(range(num_training_steps))
+model.train()
+for epoch in range(num_epochs):
+     for batch in train_dataloader:
+         batch = {k: v.to(device) for k, v in batch.items()}
+         outputs = model(**batch)
+         loss = outputs.loss
+         loss.backward()
+
+         optimizer.step()
+         lr_scheduler.step()
+         optimizer.zero_grad()
+         progress_bar.update(1)
+
+Evaluate
+Just like how you added an evaluation function to [Trainer], you need to do the same when you write your own training loop. But instead of calculating and reporting the metric at the end of each epoch, this time you'll accumulate all the batches with [~evaluate.add_batch] and calculate the metric at the very end.
+
+import evaluate
+metric = evaluate.load("accuracy")
+model.eval()
+for batch in eval_dataloader:
+     batch = {k: v.to(device) for k, v in batch.items()}
+     with torch.no_grad():
+         outputs = model(**batch)
+
+     logits = outputs.logits
+     predictions = torch.argmax(logits, dim=-1)
+     metric.add_batch(predictions=predictions, references=batch["labels"])
+
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3dac03dfd6e83fa09acb54934ff5ac9ba76aaaf3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_training.txt_chunk_8.txt
@@ -0,0 +1,9 @@
+metric.compute()
+
+Additional resources
+For more fine-tuning examples, refer to:
+
+🤗 Transformers Examples includes scripts
+  to train common NLP tasks in PyTorch and TensorFlow.
+
+🤗 Transformers Notebooks contains various notebooks on how to fine-tune a model for specific tasks in PyTorch and TensorFlow.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32863bdcbf03ffd468761f54351f1c77e260b6a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Troubleshoot
+Sometimes errors occur, but we are here to help! This guide covers some of the most common issues we've seen and how you can resolve them. However, this guide isn't meant to be a comprehensive collection of every 🤗 Transformers issue. For more help with troubleshooting your issue, try:
+
+Asking for help on the forums. There are specific categories you can post your question to, like Beginners or 🤗 Transformers. Make sure you write a good descriptive forum post with some reproducible code to maximize the likelihood that your problem is solved!
+
+Create an Issue on the 🤗 Transformers repository if it is a bug related to the library. Try to include as much information describing the bug as possible to help us better figure out what's wrong and how we can fix it.
+
+Check the Migration guide if you use an older version of 🤗 Transformers since some important changes have been introduced between versions.
+
+For more details about troubleshooting and getting help, take a look at Chapter 8 of the Hugging Face course.
+Firewalled environments
+Some GPU instances on cloud and intranet setups are firewalled to external connections, resulting in a connection error. When your script attempts to download model weights or datasets, the download will hang and then timeout with the following message:
+ValueError: Connection error, and we cannot find the requested files in the cached path.
+Please try again or make sure your Internet connection is on.
+In this case, you should try to run 🤗 Transformers on offline mode to avoid the connection error.
+CUDA out of memory
+Training large models with millions of parameters can be challenging without the appropriate hardware. A common error you may encounter when the GPU runs out of memory is:
+CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 11.17 GiB total capacity; 9.70 GiB already allocated; 179.81 MiB free; 9.85 GiB reserved in total by PyTorch)
+Here are some potential solutions you can try to lessen memory use:
+
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36b33e110abbf31bd0119b5e1636eec07b4cc494
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Reduce the per_device_train_batch_size value in [TrainingArguments].
+Try using gradient_accumulation_steps in [TrainingArguments] to effectively increase overall batch size.
+
+Refer to the Performance guide for more details about memory-saving techniques.
+
+Unable to load a saved TensorFlow model
+TensorFlow's model.save method will save the entire model - architecture, weights, training configuration - in a single file. However, when you load the model file again, you may run into an error because 🤗 Transformers may not load all the TensorFlow-related objects in the model file. To avoid issues with saving and loading TensorFlow models, we recommend you:
+
+Save the model weights as a h5 file extension with model.save_weights and then reload the model with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+from tensorflow import keras
+model.save_weights("some_folder/tf_model.h5")
+model = TFPreTrainedModel.from_pretrained("some_folder")
+
+Save the model with [~TFPretrainedModel.save_pretrained] and load it again with [~TFPreTrainedModel.from_pretrained]:
+
+from transformers import TFPreTrainedModel
+model.save_pretrained("path_to/model")
+model = TFPreTrainedModel.from_pretrained("path_to/model")
+
+ImportError
+Another common error you may encounter, especially if it is a newly released model, is ImportError:
+ImportError: cannot import name 'ImageGPTImageProcessor' from 'transformers' (unknown location)
+For these error types, check to make sure you have the latest version of 🤗 Transformers installed to access the most recent models:
+
+pip install transformers --upgrade
+CUDA error: device-side assert triggered
+Sometimes you may run into a generic CUDA error about an error in the device code.
+RuntimeError: CUDA error: device-side assert triggered
+You should try to run the code on a CPU first to get a more descriptive error message. Add the following environment variable to the beginning of your code to switch to a CPU:
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+Another option is to get a better traceback from the GPU. Add the following environment variable to the beginning of your code to get the traceback to point to the source of the error:
+
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41da8b4a10e03c859a9253c383d7419aa51cadab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_2.txt
@@ -0,0 +1,41 @@
+import os
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+
+Incorrect output when padding tokens aren't masked
+In some cases, the output hidden_state may be incorrect if the input_ids include padding tokens. To demonstrate, load a model and tokenizer. You can access a model's pad_token_id to see its value. The pad_token_id may be None for some models, but you can always manually set it.
+
+from transformers import AutoModelForSequenceClassification
+import torch
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+model.config.pad_token_id
+0
+
+The following example shows the output without masking the padding tokens:
+
+input_ids = torch.tensor([[7592, 2057, 2097, 2393, 9611, 2115], [7592, 0, 0, 0, 0, 0]])
+output = model(input_ids)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [ 0.1317, -0.1683]], grad_fn=)
+
+Here is the actual output of the second sequence:
+
+input_ids = torch.tensor([[7592]])
+output = model(input_ids)
+print(output.logits)
+tensor([[-0.1008, -0.4061]], grad_fn=)
+
+Most of the time, you should provide an attention_mask to your model to ignore the padding tokens to avoid this silent error. Now the output of the second sequence matches its actual output:
+
+By default, the tokenizer creates an attention_mask for you based on your specific tokenizer's defaults.
+
+attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1], [1, 0, 0, 0, 0, 0]])
+output = model(input_ids, attention_mask=attention_mask)
+print(output.logits)
+tensor([[ 0.0082, -0.2307],
+        [-0.1008, -0.4061]], grad_fn=)
+
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..831a4c6165a46018558efae46dd9276d7bc18c25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/_troubleshooting.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+🤗 Transformers doesn't automatically create an attention_mask to mask a padding token if it is provided because:
+
+Some models don't have a padding token.
+For some use-cases, users want a model to attend to a padding token.
+
+ValueError: Unrecognized configuration class XYZ for this kind of AutoModel
+Generally, we recommend using the [AutoModel] class to load pretrained instances of models. This class
+can automatically infer and load the correct architecture from a given checkpoint based on the configuration. If you see
+this ValueError when loading a model from a checkpoint, this means the Auto class couldn't find a mapping from
+the configuration in the given checkpoint to the kind of model you are trying to load. Most commonly, this happens when a
+checkpoint doesn't support a given task.
+For instance, you'll see this error in the following example because there is no GPT2 for question answering:
+
+from transformers import AutoProcessor, AutoModelForQuestionAnswering
+processor = AutoProcessor.from_pretrained("openai-community/gpt2-medium")
+model = AutoModelForQuestionAnswering.from_pretrained("openai-community/gpt2-medium")
+ValueError: Unrecognized configuration class  for this kind of AutoModel: AutoModelForQuestionAnswering.
+Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BloomConfig,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_audio_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d27c1dec4f5033484c0f249c7f1cd93d9df036ba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_audio_utils.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Utilities for FeatureExtractors
+This page lists all the utility functions that can be used by the audio [FeatureExtractor] in order to compute special features from a raw audio using common algorithms such as Short Time Fourier Transform or log mel spectrogram.
+Most of those are only useful if you are studying the code of the audio processors in the library.
+Audio Transformations
+[[autodoc]] audio_utils.hertz_to_mel
+[[autodoc]] audio_utils.mel_to_hertz
+[[autodoc]] audio_utils.mel_filter_bank
+[[autodoc]] audio_utils.optimal_fft_length
+[[autodoc]] audio_utils.window_function
+[[autodoc]] audio_utils.spectrogram
+[[autodoc]] audio_utils.power_to_db
+[[autodoc]] audio_utils.amplitude_to_db
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_file_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_file_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338add191b179928e6cac72aab9a9985885b70b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_file_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+General Utilities
+This page lists all of Transformers general utility functions that are found in the file utils.py.
+Most of those are only useful if you are studying the general code in the library.
+Enums and namedtuples
+[[autodoc]] utils.ExplicitEnum
+[[autodoc]] utils.PaddingStrategy
+[[autodoc]] utils.TensorType
+Special Decorators
+[[autodoc]] utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_end_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
+[[autodoc]] utils.replace_return_docstrings
+Special Properties
+[[autodoc]] utils.cached_property
+Other Utilities
+[[autodoc]] utils._LazyModule
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9378d8ffe6dfe4d9f90050d632bfd2c2a1a0fd3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Utilities for Generation
+This page lists all the utility functions used by [~generation.GenerationMixin.generate].
+Generate Outputs
+The output of [~generation.GenerationMixin.generate] is an instance of a subclass of
+[~utils.ModelOutput]. This output is a data structure containing all the information returned
+by [~generation.GenerationMixin.generate], but that can also be used as tuple or dictionary.
+Here's an example:
+thon
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
+inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
+generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
+
+The generation_output object is a [~generation.GenerateDecoderOnlyOutput], as we can
+see in the documentation of that class below, it means it has the following attributes:
+
+sequences: the generated sequences of tokens
+scores (optional): the prediction scores of the language modelling head, for each generation step
+hidden_states (optional): the hidden states of the model, for each generation step
+attentions (optional): the attention weights of the model, for each generation step
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c9f4870fa579a71bc1fc3bba66624a90ca7be8da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Here we have the scores since we passed along output_scores=True, but we don't have hidden_states and
+attentions because we didn't pass output_hidden_states=True or output_attentions=True.
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance generation_output.scores are all the generated prediction scores of the
+language modeling head, and generation_output.attentions is None.
+When using our generation_output object as a tuple, it only keeps the attributes that don't have None values.
+Here, for instance, it has two elements, loss then logits, so
+python
+generation_output[:2]
+will return the tuple (generation_output.sequences, generation_output.scores) for instance.
+When using our generation_output object as a dictionary, it only keeps the attributes that don't have None
+values. Here, for instance, it has two keys that are sequences and scores.
+We document here all output types.
+PyTorch
+[[autodoc]] generation.GenerateDecoderOnlyOutput
+[[autodoc]] generation.GenerateEncoderDecoderOutput
+[[autodoc]] generation.GenerateBeamDecoderOnlyOutput
+[[autodoc]] generation.GenerateBeamEncoderDecoderOutput
+TensorFlow
+[[autodoc]] generation.TFGreedySearchEncoderDecoderOutput
+[[autodoc]] generation.TFGreedySearchDecoderOnlyOutput
+[[autodoc]] generation.TFSampleEncoderDecoderOutput
+[[autodoc]] generation.TFSampleDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSearchEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSearchDecoderOnlyOutput
+[[autodoc]] generation.TFBeamSampleEncoderDecoderOutput
+[[autodoc]] generation.TFBeamSampleDecoderOnlyOutput
+[[autodoc]] generation.TFContrastiveSearchEncoderDecoderOutput
+[[autodoc]] generation.TFContrastiveSearchDecoderOnlyOutput
+FLAX
+[[autodoc]] generation.FlaxSampleOutput
+[[autodoc]] generation.FlaxGreedySearchOutput
+[[autodoc]] generation.FlaxBeamSearchOutput
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ea7765d08e204ca0194f11aa19f9616a766c173
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_2.txt
@@ -0,0 +1,62 @@
+LogitsProcessor
+A [LogitsProcessor] can be used to modify the prediction scores of a language model head for
+generation.
+PyTorch
+[[autodoc]] AlternatingCodebooksLogitsProcessor
+    - call
+[[autodoc]] ClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] EncoderNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] EncoderRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] EpsilonLogitsWarper
+    - call
+[[autodoc]] EtaLogitsWarper
+    - call
+[[autodoc]] ExponentialDecayLengthPenalty
+    - call
+[[autodoc]] ForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] ForceTokensLogitsProcessor
+    - call
+[[autodoc]] HammingDiversityLogitsProcessor
+    - call
+[[autodoc]] InfNanRemoveLogitsProcessor
+    - call
+[[autodoc]] LogitNormalization
+    - call
+[[autodoc]] LogitsProcessor
+    - call
+[[autodoc]] LogitsProcessorList
+    - call
+[[autodoc]] LogitsWarper
+    - call
+[[autodoc]] MinLengthLogitsProcessor
+    - call
+[[autodoc]] MinNewTokensLengthLogitsProcessor
+    - call
+[[autodoc]] MinPLogitsWarper
+    - call
+[[autodoc]] NoBadWordsLogitsProcessor
+    - call
+[[autodoc]] NoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] PrefixConstrainedLogitsProcessor
+    - call
+[[autodoc]] RepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] SequenceBiasLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] SuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93b92849bd534abddac56e5e641932a0be0a471d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_3.txt
@@ -0,0 +1,60 @@
+[[autodoc]] TemperatureLogitsWarper
+    - call
+[[autodoc]] TopKLogitsWarper
+    - call
+[[autodoc]] TopPLogitsWarper
+    - call
+[[autodoc]] TypicalLogitsWarper
+    - call
+[[autodoc]] UnbatchedClassifierFreeGuidanceLogitsProcessor
+    - call
+[[autodoc]] WhisperTimeStampLogitsProcessor
+    - call
+[[autodoc]] WatermarkLogitsProcessor
+    - call
+TensorFlow
+[[autodoc]] TFForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] TFForceTokensLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessor
+    - call
+[[autodoc]] TFLogitsProcessorList
+    - call
+[[autodoc]] TFLogitsWarper
+    - call
+[[autodoc]] TFMinLengthLogitsProcessor
+    - call
+[[autodoc]] TFNoBadWordsLogitsProcessor
+    - call
+[[autodoc]] TFNoRepeatNGramLogitsProcessor
+    - call
+[[autodoc]] TFRepetitionPenaltyLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] TFSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] TFTemperatureLogitsWarper
+    - call
+[[autodoc]] TFTopKLogitsWarper
+    - call
+[[autodoc]] TFTopPLogitsWarper
+    - call
+FLAX
+[[autodoc]] FlaxForcedBOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForcedEOSTokenLogitsProcessor
+    - call
+[[autodoc]] FlaxForceTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessor
+    - call
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2827a4e6e3ab9dea88f196f00b02ba6929beedf1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_4.txt
@@ -0,0 +1,63 @@
+[[autodoc]] FlaxLogitsProcessorList
+    - call
+[[autodoc]] FlaxLogitsWarper
+    - call
+[[autodoc]] FlaxMinLengthLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensAtBeginLogitsProcessor
+    - call
+[[autodoc]] FlaxSuppressTokensLogitsProcessor
+    - call
+[[autodoc]] FlaxTemperatureLogitsWarper
+    - call
+[[autodoc]] FlaxTopKLogitsWarper
+    - call
+[[autodoc]] FlaxTopPLogitsWarper
+    - call
+[[autodoc]] FlaxWhisperTimeStampLogitsProcessor
+    - call
+StoppingCriteria
+A [StoppingCriteria] can be used to change when to stop generation (other than EOS token). Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] StoppingCriteria
+    - call
+[[autodoc]] StoppingCriteriaList
+    - call
+[[autodoc]] MaxLengthCriteria
+    - call
+[[autodoc]] MaxTimeCriteria
+    - call
+[[autodoc]] StopStringCriteria
+    - call
+[[autodoc]] EosTokenCriteria
+    - call
+Constraints
+A [Constraint] can be used to force the generation to include specific tokens or sequences in the output. Please note that this is exclusively available to our PyTorch implementations.
+[[autodoc]] Constraint
+[[autodoc]] PhrasalConstraint
+[[autodoc]] DisjunctiveConstraint
+[[autodoc]] ConstraintListState
+BeamSearch
+[[autodoc]] BeamScorer
+    - process
+    - finalize
+[[autodoc]] BeamSearchScorer
+    - process
+    - finalize
+[[autodoc]] ConstrainedBeamSearchScorer
+    - process
+    - finalize
+Streamers
+[[autodoc]] TextStreamer
+[[autodoc]] TextIteratorStreamer
+Caches
+[[autodoc]] Cache
+    - update
+[[autodoc]] CacheConfig
+    - update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60e2ce876b7a470a9a95e3aa0019bc4ecf54ca27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_generation_utils.txt_chunk_5.txt
@@ -0,0 +1,25 @@
+- update
+[[autodoc]] QuantizedCacheConfig
+    - validate
+[[autodoc]] DynamicCache
+    - update
+    - get_seq_length
+    - reorder_cache
+    - to_legacy_cache
+    - from_legacy_cache
+[[autodoc]] QuantizedCache
+    - update
+    - get_seq_length
+[[autodoc]] QuantoQuantizedCache
+[[autodoc]] HQQQuantizedCache
+[[autodoc]] SinkCache
+    - update
+    - get_seq_length
+    - reorder_cache
+[[autodoc]] StaticCache
+    - update
+    - get_seq_length
+    - reset
+Watermark Utils
+[[autodoc]] WatermarkDetector
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d603704f31625dd2e43f3b2b2b015debe52facbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_image_processing_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Image Processors
+This page lists all the utility functions used by the image processors, mainly the functional
+transformations used to process the images.
+Most of those are only useful if you are studying the code of the image processors in the library.
+Image Transformations
+[[autodoc]] image_transforms.center_crop
+[[autodoc]] image_transforms.center_to_corners_format
+[[autodoc]] image_transforms.corners_to_center_format
+[[autodoc]] image_transforms.id_to_rgb
+[[autodoc]] image_transforms.normalize
+[[autodoc]] image_transforms.pad
+[[autodoc]] image_transforms.rgb_to_id
+[[autodoc]] image_transforms.rescale
+[[autodoc]] image_transforms.resize
+[[autodoc]] image_transforms.to_pil_image
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..547a4e873af064c992555bd1ec78c15d9f7214e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_modeling_utils.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Custom Layers and Utilities
+This page lists all the custom layers used by the library, as well as the utility functions it provides for modeling.
+Most of those are only useful if you are studying the code of the models in the library.
+Pytorch custom modules
+[[autodoc]] pytorch_utils.Conv1D
+[[autodoc]] modeling_utils.PoolerStartLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerEndLogits
+    - forward
+[[autodoc]] modeling_utils.PoolerAnswerClass
+    - forward
+[[autodoc]] modeling_utils.SquadHeadOutput
+[[autodoc]] modeling_utils.SQuADHead
+    - forward
+[[autodoc]] modeling_utils.SequenceSummary
+    - forward
+PyTorch Helper Functions
+[[autodoc]] pytorch_utils.apply_chunking_to_forward
+[[autodoc]] pytorch_utils.find_pruneable_heads_and_indices
+[[autodoc]] pytorch_utils.prune_layer
+[[autodoc]] pytorch_utils.prune_conv1d_layer
+[[autodoc]] pytorch_utils.prune_linear_layer
+TensorFlow custom layers
+[[autodoc]] modeling_tf_utils.TFConv1D
+[[autodoc]] modeling_tf_utils.TFSequenceSummary
+TensorFlow loss functions
+[[autodoc]] modeling_tf_utils.TFCausalLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMaskedLanguageModelingLoss
+[[autodoc]] modeling_tf_utils.TFMultipleChoiceLoss
+[[autodoc]] modeling_tf_utils.TFQuestionAnsweringLoss
+[[autodoc]] modeling_tf_utils.TFSequenceClassificationLoss
+[[autodoc]] modeling_tf_utils.TFTokenClassificationLoss
+TensorFlow Helper Functions
+[[autodoc]] modeling_tf_utils.get_initializer
+[[autodoc]] modeling_tf_utils.keras_serializable
+[[autodoc]] modeling_tf_utils.shape_list
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..656080f5588a631853a740c03778809b9af093c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_pipelines_utils.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Utilities for pipelines
+This page lists all the utility functions the library provides for pipelines.
+Most of those are only useful if you are studying the code of the models in the library.
+Argument handling
+[[autodoc]] pipelines.ArgumentHandler
+[[autodoc]] pipelines.ZeroShotClassificationArgumentHandler
+[[autodoc]] pipelines.QuestionAnsweringArgumentHandler
+Data format
+[[autodoc]] pipelines.PipelineDataFormat
+[[autodoc]] pipelines.CsvPipelineDataFormat
+[[autodoc]] pipelines.JsonPipelineDataFormat
+[[autodoc]] pipelines.PipedPipelineDataFormat
+Utilities
+[[autodoc]] pipelines.PipelineException
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33b53a56e230fbec6903b4b0e8bcb7d698aaa952
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_time_series_utils.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Time Series Utilities
+This page lists all the utility functions and classes that can be used for Time Series based models.
+Most of those are only useful if you are studying the code of the time series models or you wish to add to the collection of distributional output classes.
+Distributional Output
+[[autodoc]] time_series_utils.NormalOutput
+[[autodoc]] time_series_utils.StudentTOutput
+[[autodoc]] time_series_utils.NegativeBinomialOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d673b2acce55702365a5f8169e62c4f4990a3f49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_tokenization_utils.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Utilities for Tokenizers
+This page lists all the utility functions used by the tokenizers, mainly the class
+[~tokenization_utils_base.PreTrainedTokenizerBase] that implements the common methods between
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] and the mixin
+[~tokenization_utils_base.SpecialTokensMixin].
+Most of those are only useful if you are studying the code of the tokenizers in the library.
+PreTrainedTokenizerBase
+[[autodoc]] tokenization_utils_base.PreTrainedTokenizerBase
+    - call
+    - all
+SpecialTokensMixin
+[[autodoc]] tokenization_utils_base.SpecialTokensMixin
+Enums and namedtuples
+[[autodoc]] tokenization_utils_base.TruncationStrategy
+[[autodoc]] tokenization_utils_base.CharSpan
+[[autodoc]] tokenization_utils_base.TokenSpan
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eefe9b8c82a7ae8f218bbf0f176a39059f6d2f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/internal_trainer_utils.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Utilities for Trainer
+This page lists all the utility functions used by [Trainer].
+Most of those are only useful if you are studying the code of the Trainer in the library.
+Utilities
+[[autodoc]] EvalPrediction
+[[autodoc]] IntervalStrategy
+[[autodoc]] enable_full_determinism
+[[autodoc]] set_seed
+[[autodoc]] torch_distributed_zero_first
+Callbacks internals
+[[autodoc]] trainer_callback.CallbackHandler
+Distributed Evaluation
+[[autodoc]] trainer_pt_utils.DistributedTensorGatherer
+Trainer Argument Parser
+[[autodoc]] HfArgumentParser
+Debug Utilities
+[[autodoc]] debug_utils.DebugUnderflowOverflow
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..228e4405f893d29d4ab6aecf66aad4a5f740e307
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_0.txt
@@ -0,0 +1,52 @@
+Agents & Tools
+
+Transformers Agents is an experimental API which is subject to change at any time. Results returned by the agents
+can vary as the APIs or underlying models are prone to change.
+
+To learn more about agents and tools make sure to read the introductory guide. This page
+contains the API docs for the underlying classes.
+Agents
+We provide two types of agents, based on the main [Agent] class:
+- [CodeAgent] acts in one shot, generating code to solve the task, then executes it at once.
+- [ReactAgent] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
+  - [ReactJsonAgent] writes its tool calls in JSON.
+  - [ReactCodeAgent] writes its tool calls in Python code.
+Agent
+[[autodoc]] Agent
+CodeAgent
+[[autodoc]] CodeAgent
+React agents
+[[autodoc]] ReactAgent
+[[autodoc]] ReactJsonAgent
+[[autodoc]] ReactCodeAgent
+Tools
+load_tool
+[[autodoc]] load_tool
+Tool
+[[autodoc]] Tool
+Toolbox
+[[autodoc]] Toolbox
+PipelineTool
+[[autodoc]] PipelineTool
+launch_gradio_demo
+[[autodoc]] launch_gradio_demo
+ToolCollection
+[[autodoc]] ToolCollection
+Engines
+You're free to create and use your own engines to be usable by the Agents framework.
+These engines have the following specification:
+1. Follow the messages format for its input (List[Dict[str, str]]) and return a string.
+2. Stop generating outputs before the sequences passed in the argument stop_sequences
+HfEngine
+For convenience, we have added a HfEngine that implements the points above and uses an inference endpoint for the execution of the LLM.
+thon
+
+from transformers import HfEngine
+messages = [
+   {"role": "user", "content": "Hello, how are you?"},
+   {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+   {"role": "user", "content": "No need to help, take it easy."},
+ ]
+HfEngine()(messages, stop_sequences=["conversation"])
+
+"That's very kind of you to say! It's always nice to have a relaxed "
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f8850183b858cdc3f1aef3c13e7a58a0009ebac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_agent.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+"That's very kind of you to say! It's always nice to have a relaxed "
+
+[[autodoc]] HfEngine
+Agent Types
+Agents can handle any type of object in-between tools; tools, being completely multimodal, can accept and return
+text, image, audio, video, among other types. In order to increase compatibility between tools, as well as to 
+correctly render these returns in ipython (jupyter, colab, ipython notebooks, ), we implement wrapper classes
+around these types.
+The wrapped objects should continue behaving as initially; a text object should still behave as a string, an image
+object should still behave as a PIL.Image.
+These types have three specific purposes:
+
+Calling to_raw on the type should return the underlying object
+Calling to_string on the type should return the object as a string: that can be the string in case of an AgentText
+  but will be the path of the serialized version of the object in other instances
+Displaying it in an ipython kernel should display the object correctly
+
+AgentText
+[[autodoc]] transformers.agents.agent_types.AgentText
+AgentImage
+[[autodoc]] transformers.agents.agent_types.AgentImage
+AgentAudio
+[[autodoc]] transformers.agents.agent_types.AgentAudio
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_backbones.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cde1b5b205f04ae43291411a7bbc19f3c7a73bea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_backbones.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Backbone
+A backbone is a model used for feature extraction for higher level computer vision tasks such as object detection and image classification. Transformers provides an [AutoBackbone] class for initializing a Transformers backbone from pretrained model weights, and two utility classes:
+
+[~utils.BackboneMixin] enables initializing a backbone from Transformers or timm and includes functions for returning the output features and indices.
+[~utils.BackboneConfigMixin] sets the output features and indices of the backbone configuration.
+
+timm models are loaded with the [TimmBackbone] and [TimmBackboneConfig] classes.
+Backbones are supported for the following models:
+
+BEiT
+BiT
+ConvNet
+ConvNextV2
+DiNAT
+DINOV2
+FocalNet
+MaskFormer
+NAT
+ResNet
+Swin Transformer
+Swin Transformer v2
+ViTDet
+
+AutoBackbone
+[[autodoc]] AutoBackbone
+BackboneMixin
+[[autodoc]] utils.BackboneMixin
+BackboneConfigMixin
+[[autodoc]] utils.BackboneConfigMixin
+TimmBackbone
+[[autodoc]] models.timm_backbone.TimmBackbone
+TimmBackboneConfig
+[[autodoc]] models.timm_backbone.TimmBackboneConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64c5ffd97cd26bde3a05b126f3e969c57e0c2b91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Callbacks
+Callbacks are objects that can customize the behavior of the training loop in the PyTorch
+[Trainer] (this feature is not yet implemented in TensorFlow) that can inspect the training loop
+state (for progress reporting, logging on TensorBoard or other ML platforms) and take decisions (like early
+stopping).
+Callbacks are "read only" pieces of code, apart from the [TrainerControl] object they return, they
+cannot change anything in the training loop. For customizations that require changes in the training loop, you should
+subclass [Trainer] and override the methods you need (see trainer for examples).
+By default, TrainingArguments.report_to is set to "all", so a [Trainer] will use the following callbacks.
+
+[DefaultFlowCallback] which handles the default behavior for logging, saving and evaluation.
+[PrinterCallback] or [ProgressCallback] to display progress and print the
+  logs (the first one is used if you deactivate tqdm through the [TrainingArguments], otherwise
+  it's the second one).
+[~integrations.TensorBoardCallback] if tensorboard is accessible (either through PyTorch >= 1.4
+  or tensorboardX).
+[~integrations.WandbCallback] if wandb is installed.
+[~integrations.CometCallback] if comet_ml is installed.
+[~integrations.MLflowCallback] if mlflow is installed.
+[~integrations.NeptuneCallback] if neptune is installed.
+[~integrations.AzureMLCallback] if azureml-sdk is
+  installed.
+[~integrations.CodeCarbonCallback] if codecarbon is
+  installed.
+[~integrations.ClearMLCallback] if clearml is installed.
+[~integrations.DagsHubCallback] if dagshub is installed.
+[~integrations.FlyteCallback] if flyte is installed.
+[~integrations.DVCLiveCallback] if dvclive is installed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b4a40f3121fe50f2cb44e812251012cd4c47354
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_callback.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+If a package is installed but you don't wish to use the accompanying integration, you can change TrainingArguments.report_to to a list of just those integrations you want to use (e.g. ["azure_ml", "wandb"]). 
+The main class that implements callbacks is [TrainerCallback]. It gets the
+[TrainingArguments] used to instantiate the [Trainer], can access that
+Trainer's internal state via [TrainerState], and can take some actions on the training loop via
+[TrainerControl].
+Available Callbacks
+Here is the list of the available [TrainerCallback] in the library:
+[[autodoc]] integrations.CometCallback
+    - setup
+[[autodoc]] DefaultFlowCallback
+[[autodoc]] PrinterCallback
+[[autodoc]] ProgressCallback
+[[autodoc]] EarlyStoppingCallback
+[[autodoc]] integrations.TensorBoardCallback
+[[autodoc]] integrations.WandbCallback
+    - setup
+[[autodoc]] integrations.MLflowCallback
+    - setup
+[[autodoc]] integrations.AzureMLCallback
+[[autodoc]] integrations.CodeCarbonCallback
+[[autodoc]] integrations.NeptuneCallback
+[[autodoc]] integrations.ClearMLCallback
+[[autodoc]] integrations.DagsHubCallback
+[[autodoc]] integrations.FlyteCallback
+[[autodoc]] integrations.DVCLiveCallback
+    - setup
+TrainerCallback
+[[autodoc]] TrainerCallback
+Here is an example of how to register a custom callback with the PyTorch [Trainer]:
+thon
+class MyCallback(TrainerCallback):
+    "A callback that prints a message at the beginning of training"
+def on_train_begin(self, args, state, control, **kwargs):
+    print("Starting training")
+
+trainer = Trainer(
+    model,
+    args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    callbacks=[MyCallback],  # We can either pass the callback class this way or an instance of it (MyCallback())
+)
+
+Another way to register a callback is to call trainer.add_callback() as follows:
+thon
+trainer = Trainer()
+trainer.add_callback(MyCallback)
+Alternatively, we can pass an instance of the callback class
+trainer.add_callback(MyCallback())
+
+TrainerState
+[[autodoc]] TrainerState
+TrainerControl
+[[autodoc]] TrainerControl
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_configuration.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f407df3bdcf49458570d563a8dec96089a71081
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_configuration.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Configuration
+The base class [PretrainedConfig] implements the common methods for loading/saving a configuration
+either from a local file or directory, or from a pretrained model configuration provided by the library (downloaded
+from HuggingFace's AWS S3 repository).
+Each derived config class implements model specific attributes. Common attributes present in all config classes are:
+hidden_size, num_attention_heads, and num_hidden_layers. Text models further implement:
+vocab_size.
+PretrainedConfig
+[[autodoc]] PretrainedConfig
+    - push_to_hub
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d6ab6ec5cc245eff15412bca8ab474505d0594
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_data_collator.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Data Collator
+Data collators are objects that will form a batch by using a list of dataset elements as input. These elements are of
+the same type as the elements of train_dataset or eval_dataset.
+To be able to build batches, data collators may apply some processing (like padding). Some of them (like
+[DataCollatorForLanguageModeling]) also apply some random data augmentation (like random masking)
+on the formed batch.
+Examples of use can be found in the example scripts or example notebooks.
+Default data collator
+[[autodoc]] data.data_collator.default_data_collator
+DefaultDataCollator
+[[autodoc]] data.data_collator.DefaultDataCollator
+DataCollatorWithPadding
+[[autodoc]] data.data_collator.DataCollatorWithPadding
+DataCollatorForTokenClassification
+[[autodoc]] data.data_collator.DataCollatorForTokenClassification
+DataCollatorForSeq2Seq
+[[autodoc]] data.data_collator.DataCollatorForSeq2Seq
+DataCollatorForLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForWholeWordMask
+[[autodoc]] data.data_collator.DataCollatorForWholeWordMask
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
+DataCollatorForPermutationLanguageModeling
+[[autodoc]] data.data_collator.DataCollatorForPermutationLanguageModeling
+    - numpy_mask_tokens
+    - tf_mask_tokens
+    - torch_mask_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f39666365c9363c7c65c1077024bfefec8e019d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_deepspeed.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+DeepSpeed
+DeepSpeed, powered by Zero Redundancy Optimizer (ZeRO), is an optimization library for training and fitting very large models onto a GPU. It is available in several ZeRO stages, where each stage progressively saves more GPU memory by partitioning the optimizer state, gradients, parameters, and enabling offloading to a CPU or NVMe. DeepSpeed is integrated with the [Trainer] class and most of the setup is automatically taken care of for you. 
+However, if you want to use DeepSpeed without the [Trainer], Transformers provides a [HfDeepSpeedConfig] class.
+
+Learn more about using DeepSpeed with [Trainer] in the DeepSpeed guide.
+
+HfDeepSpeedConfig
+[[autodoc]] integrations.HfDeepSpeedConfig
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7280b9edaf56aea663db28d23c3417a3f7fb4fb6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_feature_extractor.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+Feature Extractor
+A feature extractor is in charge of preparing input features for audio or vision models. This includes feature extraction from sequences, e.g., pre-processing audio files to generate Log-Mel Spectrogram features, feature extraction from images, e.g., cropping image files, but also padding, normalization, and conversion to NumPy, PyTorch, and TensorFlow tensors.
+FeatureExtractionMixin
+[[autodoc]] feature_extraction_utils.FeatureExtractionMixin
+    - from_pretrained
+    - save_pretrained
+SequenceFeatureExtractor
+[[autodoc]] SequenceFeatureExtractor
+    - pad
+BatchFeature
+[[autodoc]] BatchFeature
+ImageFeatureExtractionMixin
+[[autodoc]] image_utils.ImageFeatureExtractionMixin
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51d0fadb1928f477519d5fc8abbd1cb9f37de210
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_image_processor.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Image Processor
+An image processor is in charge of preparing input features for vision models and post processing their outputs. This includes transformations such as resizing, normalization, and conversion to PyTorch, TensorFlow, Flax and Numpy tensors. It may also include model specific post-processing such as converting logits to segmentation masks.
+ImageProcessingMixin
+[[autodoc]] image_processing_utils.ImageProcessingMixin
+    - from_pretrained
+    - save_pretrained
+BatchFeature
+[[autodoc]] BatchFeature
+BaseImageProcessor
+[[autodoc]] image_processing_utils.BaseImageProcessor
+BaseImageProcessorFast
+[[autodoc]] image_processing_utils_fast.BaseImageProcessorFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4593473020f4c7dc36e5b697c0a9126d8d55e1e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_keras_callbacks.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Keras callbacks
+When training a Transformers model with Keras, there are some library-specific callbacks available to automate common
+tasks:
+KerasMetricCallback
+[[autodoc]] KerasMetricCallback
+PushToHubCallback
+[[autodoc]] PushToHubCallback
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..271a11771e23bfefc3d5961749db2d9de318330f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Logging
+🤗 Transformers has a centralized logging system, so that you can setup the verbosity of the library easily.
+Currently the default verbosity of the library is WARNING.
+To change the level of verbosity, just use one of the direct setters. For instance, here is how to change the verbosity
+to the INFO level.
+thon
+import transformers
+transformers.logging.set_verbosity_info()
+
+You can also use the environment variable TRANSFORMERS_VERBOSITY to override the default verbosity. You can set it
+to one of the following: debug, info, warning, error, critical. For example:
+
+TRANSFORMERS_VERBOSITY=error ./myprogram.py
+Additionally, some warnings can be disabled by setting the environment variable
+TRANSFORMERS_NO_ADVISORY_WARNINGS to a true value, like 1. This will disable any warning that is logged using
+[logger.warning_advice]. For example:
+
+TRANSFORMERS_NO_ADVISORY_WARNINGS=1 ./myprogram.py
+Here is an example of how to use the same logger as the library in your own module or script:
+thon
+from transformers.utils import logging
+logging.set_verbosity_info()
+logger = logging.get_logger("transformers")
+logger.info("INFO")
+logger.warning("WARN")
+
+All the methods of this logging module are documented below, the main ones are
+[logging.get_verbosity] to get the current level of verbosity in the logger and
+[logging.set_verbosity] to set the verbosity to the level of your choice. In order (from the least
+verbose to the most verbose), those levels (with their corresponding int values in parenthesis) are:
+
+transformers.logging.CRITICAL or transformers.logging.FATAL (int value, 50): only report the most
+  critical errors.
+transformers.logging.ERROR (int value, 40): only report errors.
+transformers.logging.WARNING or transformers.logging.WARN (int value, 30): only reports error and
+  warnings. This the default level used by the library.
+transformers.logging.INFO (int value, 20): reports error, warnings and basic information.
+transformers.logging.DEBUG (int value, 10): report all information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa046d9a12336eceeaee6551868fbe0c01b3b7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_logging.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+By default, tqdm progress bars will be displayed during model download. [logging.disable_progress_bar] and [logging.enable_progress_bar] can be used to suppress or unsuppress this behavior.
+logging vs warnings
+Python has two logging systems that are often used in conjunction: logging, which is explained above, and warnings,
+which allows further classification of warnings in specific buckets, e.g., FutureWarning for a feature or path
+that has already been deprecated and DeprecationWarning to indicate an upcoming deprecation.
+We use both in the transformers library. We leverage and adapt logging's captureWarning method to allow
+management of these warning messages by the verbosity setters above.
+What does that mean for developers of the library? We should respect the following heuristic:
+- warnings should be favored for developers of the library and libraries dependent on transformers
+- logging should be used for end-users of the library using it in every-day projects
+See reference of the captureWarnings method below.
+[[autodoc]] logging.captureWarnings
+Base setters
+[[autodoc]] logging.set_verbosity_error
+[[autodoc]] logging.set_verbosity_warning
+[[autodoc]] logging.set_verbosity_info
+[[autodoc]] logging.set_verbosity_debug
+Other functions
+[[autodoc]] logging.get_verbosity
+[[autodoc]] logging.set_verbosity
+[[autodoc]] logging.get_logger
+[[autodoc]] logging.enable_default_handler
+[[autodoc]] logging.disable_default_handler
+[[autodoc]] logging.enable_explicit_format
+[[autodoc]] logging.reset_format
+[[autodoc]] logging.enable_progress_bar
+[[autodoc]] logging.disable_progress_bar
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_model.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_model.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a686f8babb6a7b2e9617ffe3ff6d2e836b47c9d3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_model.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Models
+The base classes [PreTrainedModel], [TFPreTrainedModel], and
+[FlaxPreTrainedModel] implement the common methods for loading/saving a model either from a local
+file or directory, or from a pretrained model configuration provided by the library (downloaded from HuggingFace's AWS
+S3 repository).
+[PreTrainedModel] and [TFPreTrainedModel] also implement a few methods which
+are common among all the models to:
+
+resize the input token embeddings when new tokens are added to the vocabulary
+prune the attention heads of the model.
+
+The other methods that are common to each model are defined in [~modeling_utils.ModuleUtilsMixin]
+(for the PyTorch models) and [~modeling_tf_utils.TFModuleUtilsMixin] (for the TensorFlow models) or
+for text generation, [~generation.GenerationMixin] (for the PyTorch models),
+[~generation.TFGenerationMixin] (for the TensorFlow models) and
+[~generation.FlaxGenerationMixin] (for the Flax/JAX models).
+PreTrainedModel
+[[autodoc]] PreTrainedModel
+    - push_to_hub
+    - all
+ModuleUtilsMixin
+[[autodoc]] modeling_utils.ModuleUtilsMixin
+TFPreTrainedModel
+[[autodoc]] TFPreTrainedModel
+    - push_to_hub
+    - all
+TFModelUtilsMixin
+[[autodoc]] modeling_tf_utils.TFModelUtilsMixin
+FlaxPreTrainedModel
+[[autodoc]] FlaxPreTrainedModel
+    - push_to_hub
+    - all
+Pushing to the Hub
+[[autodoc]] utils.PushToHubMixin
+Sharded checkpoints
+[[autodoc]] modeling_utils.load_sharded_checkpoint
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_onnx.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6294e6269d2344ebf4a51e8c458c0b32de7a22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_onnx.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Exporting 🤗 Transformers models to ONNX
+🤗 Transformers provides a transformers.onnx package that enables you to
+convert model checkpoints to an ONNX graph by leveraging configuration objects.
+See the guide on exporting 🤗 Transformers models for more
+details.
+ONNX Configurations
+We provide three abstract classes that you should inherit from, depending on the
+type of model architecture you wish to export:
+
+Encoder-based models inherit from [~onnx.config.OnnxConfig]
+Decoder-based models inherit from [~onnx.config.OnnxConfigWithPast]
+Encoder-decoder models inherit from [~onnx.config.OnnxSeq2SeqConfigWithPast]
+
+OnnxConfig
+[[autodoc]] onnx.config.OnnxConfig
+OnnxConfigWithPast
+[[autodoc]] onnx.config.OnnxConfigWithPast
+OnnxSeq2SeqConfigWithPast
+[[autodoc]] onnx.config.OnnxSeq2SeqConfigWithPast
+ONNX Features
+Each ONNX configuration is associated with a set of features that enable you
+to export models for different types of topologies or tasks.
+FeaturesManager
+[[autodoc]] onnx.features.FeaturesManager
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed56043d2b4b86320b73f9956ea121bf314b6348
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_optimizer_schedules.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Optimization
+The .optimization module provides:
+
+an optimizer with weight decay fixed that can be used to fine-tuned models, and
+several schedules in the form of schedule objects that inherit from _LRSchedule:
+a gradient accumulation class to accumulate the gradients of multiple batches
+
+AdamW (PyTorch)
+[[autodoc]] AdamW
+AdaFactor (PyTorch)
+[[autodoc]] Adafactor
+AdamWeightDecay (TensorFlow)
+[[autodoc]] AdamWeightDecay
+[[autodoc]] create_optimizer
+Schedules
+Learning Rate Schedules (Pytorch)
+[[autodoc]] SchedulerType
+[[autodoc]] get_scheduler
+[[autodoc]] get_constant_schedule
+[[autodoc]] get_constant_schedule_with_warmup
+
+[[autodoc]] get_cosine_schedule_with_warmup
+
+[[autodoc]] get_cosine_with_hard_restarts_schedule_with_warmup
+
+[[autodoc]] get_linear_schedule_with_warmup
+
+[[autodoc]] get_polynomial_decay_schedule_with_warmup
+[[autodoc]] get_inverse_sqrt_schedule
+[[autodoc]] get_wsd_schedule
+Warmup (TensorFlow)
+[[autodoc]] WarmUp
+Gradient Strategies
+GradientAccumulator (TensorFlow)
+[[autodoc]] GradientAccumulator
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..436f311ffdb79e03f447a352d47bfab27e448776
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Model outputs
+All models have outputs that are instances of subclasses of [~utils.ModelOutput]. Those are
+data structures containing all the information returned by the model, but that can also be used as tuples or
+dictionaries.
+Let's see how this looks in an example:
+thon
+from transformers import BertTokenizer, BertForSequenceClassification
+import torch
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
+outputs = model(**inputs, labels=labels)
+
+The outputs object is a [~modeling_outputs.SequenceClassifierOutput], as we can see in the
+documentation of that class below, it means it has an optional loss, a logits, an optional hidden_states and
+an optional attentions attribute. Here we have the loss since we passed along labels, but we don't have
+hidden_states and attentions because we didn't pass output_hidden_states=True or
+output_attentions=True.
+
+When passing output_hidden_states=True you may expect the outputs.hidden_states[-1] to match outputs.last_hidden_states exactly.
+However, this is not always the case. Some models apply normalization or subsequent process to the last hidden state when it's returned.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a02eeed08918594bd1d6b8db980ce4d1ec93ba5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+You can access each attribute as you would usually do, and if that attribute has not been returned by the model, you
+will get None. Here for instance outputs.loss is the loss computed by the model, and outputs.attentions is
+None.
+When considering our outputs object as tuple, it only considers the attributes that don't have None values.
+Here for instance, it has two elements, loss then logits, so
+python
+outputs[:2]
+will return the tuple (outputs.loss, outputs.logits) for instance.
+When considering our outputs object as dictionary, it only considers the attributes that don't have None
+values. Here for instance, it has two keys that are loss and logits.
+We document here the generic model outputs that are used by more than one model type. Specific output types are
+documented on their corresponding model page.
+ModelOutput
+[[autodoc]] utils.ModelOutput
+    - to_tuple
+BaseModelOutput
+[[autodoc]] modeling_outputs.BaseModelOutput
+BaseModelOutputWithPooling
+[[autodoc]] modeling_outputs.BaseModelOutputWithPooling
+BaseModelOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithCrossAttentions
+BaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions
+BaseModelOutputWithPast
+[[autodoc]] modeling_outputs.BaseModelOutputWithPast
+BaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_outputs.BaseModelOutputWithPastAndCrossAttentions
+Seq2SeqModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqModelOutput
+CausalLMOutput
+[[autodoc]] modeling_outputs.CausalLMOutput
+CausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_outputs.CausalLMOutputWithCrossAttentions
+CausalLMOutputWithPast
+[[autodoc]] modeling_outputs.CausalLMOutputWithPast
+MaskedLMOutput
+[[autodoc]] modeling_outputs.MaskedLMOutput
+Seq2SeqLMOutput
+[[autodoc]] modeling_outputs.Seq2SeqLMOutput
+NextSentencePredictorOutput
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26ced04c21a0f7de183512b3936b457568231969
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+[[autodoc]] modeling_outputs.NextSentencePredictorOutput
+SequenceClassifierOutput
+[[autodoc]] modeling_outputs.SequenceClassifierOutput
+Seq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_outputs.Seq2SeqSequenceClassifierOutput
+MultipleChoiceModelOutput
+[[autodoc]] modeling_outputs.MultipleChoiceModelOutput
+TokenClassifierOutput
+[[autodoc]] modeling_outputs.TokenClassifierOutput
+QuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.QuestionAnsweringModelOutput
+Seq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqQuestionAnsweringModelOutput
+Seq2SeqSpectrogramOutput
+[[autodoc]] modeling_outputs.Seq2SeqSpectrogramOutput
+SemanticSegmenterOutput
+[[autodoc]] modeling_outputs.SemanticSegmenterOutput
+ImageClassifierOutput
+[[autodoc]] modeling_outputs.ImageClassifierOutput
+ImageClassifierOutputWithNoAttention
+[[autodoc]] modeling_outputs.ImageClassifierOutputWithNoAttention
+DepthEstimatorOutput
+[[autodoc]] modeling_outputs.DepthEstimatorOutput
+Wav2Vec2BaseModelOutput
+[[autodoc]] modeling_outputs.Wav2Vec2BaseModelOutput
+XVectorOutput
+[[autodoc]] modeling_outputs.XVectorOutput
+Seq2SeqTSModelOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSModelOutput
+Seq2SeqTSPredictionOutput
+[[autodoc]] modeling_outputs.Seq2SeqTSPredictionOutput
+SampleTSPredictionOutput
+[[autodoc]] modeling_outputs.SampleTSPredictionOutput
+TFBaseModelOutput
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutput
+TFBaseModelOutputWithPooling
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPooling
+TFBaseModelOutputWithPoolingAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPoolingAndCrossAttentions
+TFBaseModelOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPast
+TFBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFBaseModelOutputWithPastAndCrossAttentions
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c246a1b8447c7be36ca311685e0cda2e00d2c0e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+TFSeq2SeqModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqModelOutput
+TFCausalLMOutput
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutput
+TFCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
+TFCausalLMOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFCausalLMOutputWithPast
+TFMaskedLMOutput
+[[autodoc]] modeling_tf_outputs.TFMaskedLMOutput
+TFSeq2SeqLMOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqLMOutput
+TFNextSentencePredictorOutput
+[[autodoc]] modeling_tf_outputs.TFNextSentencePredictorOutput
+TFSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutput
+TFSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqSequenceClassifierOutput
+TFMultipleChoiceModelOutput
+[[autodoc]] modeling_tf_outputs.TFMultipleChoiceModelOutput
+TFTokenClassifierOutput
+[[autodoc]] modeling_tf_outputs.TFTokenClassifierOutput
+TFQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFQuestionAnsweringModelOutput
+TFSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_tf_outputs.TFSeq2SeqQuestionAnsweringModelOutput
+FlaxBaseModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutput
+FlaxBaseModelOutputWithPast
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPast
+FlaxBaseModelOutputWithPooling
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPooling
+FlaxBaseModelOutputWithPastAndCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxBaseModelOutputWithPastAndCrossAttentions
+FlaxSeq2SeqModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqModelOutput
+FlaxCausalLMOutputWithCrossAttentions
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c84226255936ec24b10e309d2e3c3da790a4d8d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_output.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+[[autodoc]] modeling_flax_outputs.FlaxCausalLMOutputWithCrossAttentions
+FlaxMaskedLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxMaskedLMOutput
+FlaxSeq2SeqLMOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqLMOutput
+FlaxNextSentencePredictorOutput
+[[autodoc]] modeling_flax_outputs.FlaxNextSentencePredictorOutput
+FlaxSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSequenceClassifierOutput
+FlaxSeq2SeqSequenceClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqSequenceClassifierOutput
+FlaxMultipleChoiceModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxMultipleChoiceModelOutput
+FlaxTokenClassifierOutput
+[[autodoc]] modeling_flax_outputs.FlaxTokenClassifierOutput
+FlaxQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxQuestionAnsweringModelOutput
+FlaxSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] modeling_flax_outputs.FlaxSeq2SeqQuestionAnsweringModelOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..353155dee358c6682de2dc3ffeb391af2aa21eca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Pipelines
+The pipelines are a great and easy way to use models for inference. These pipelines are objects that abstract most of
+the complex code from the library, offering a simple API dedicated to several tasks, including Named Entity
+Recognition, Masked Language Modeling, Sentiment Analysis, Feature Extraction and Question Answering. See the
+task summary for examples of use.
+There are two categories of pipeline abstractions to be aware about:
+
+The [pipeline] which is the most powerful object encapsulating all other pipelines.
+Task-specific pipelines are available for audio, computer vision, natural language processing, and multimodal tasks.
+
+The pipeline abstraction
+The pipeline abstraction is a wrapper around all the other available pipelines. It is instantiated as any other
+pipeline but can provide additional quality of life.
+Simple call on one item:
+thon
+
+pipe = pipeline("text-classification")
+pipe("This restaurant is awesome")
+[{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+
+If you want to use a specific model from the hub you can ignore the task if the model on
+the hub already defines it:
+thon
+
+pipe = pipeline(model="FacebookAI/roberta-large-mnli")
+pipe("This restaurant is awesome")
+[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
+
+To call a pipeline on many items, you can call it with a list.
+thon
+
+pipe = pipeline("text-classification")
+pipe(["This restaurant is awesome", "This restaurant is awful"])
+[{'label': 'POSITIVE', 'score': 0.9998743534088135},
+ {'label': 'NEGATIVE', 'score': 0.9996669292449951}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fa4cc3858b1368072771fae1d5f57706ba92111
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+To iterate over full datasets it is recommended to use a dataset directly. This means you don't need to allocate
+the whole dataset at once, nor do you need to do batching yourself. This should work just as fast as custom loops on
+GPU. If it doesn't don't hesitate to create an issue.
+thon
+import datasets
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+from tqdm.auto import tqdm
+pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
+dataset = datasets.load_dataset("superb", name="asr", split="test")
+KeyDataset (only pt) will simply return the item in the dict returned by the dataset item
+as we're not interested in the target part of the dataset. For sentence pair use KeyPairDataset
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
+
+For ease of use, a generator is also possible:
+thon
+from transformers import pipeline
+pipe = pipeline("text-classification")
+def data():
+    while True:
+        # This could come from a dataset, a database, a queue or HTTP request
+        # in a server
+        # Caveat: because this is iterative, you cannot use num_workers > 1 variable
+        # to use multiple threads to preprocess data. You can still have 1 thread that
+        # does the preprocessing while the main runs the big inference
+        yield "This is a test"
+for out in pipe(data()):
+    print(out)
+    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
+    # {"text": .}
+    # .
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..102275ce62b9277b0516f075e9591bdd7311c0b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_2.txt
@@ -0,0 +1,48 @@
+[[autodoc]] pipeline
+Pipeline batching
+All pipelines can use batching. This will work
+whenever the pipeline uses its streaming ability (so when passing lists or Dataset or generator).
+thon
+from transformers import pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
+pipe = pipeline("text-classification", device=0)
+for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
+    print(out)
+    # [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
+    # Exactly the same output as before, but the content are passed
+    # as batches to the model
+
+However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
+on hardware, data and the actual model being used.
+Example where it's mostly a speedup:
+
+thon
+from transformers import pipeline
+from torch.utils.data import Dataset
+from tqdm.auto import tqdm
+pipe = pipeline("text-classification", device=0)
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    return "This is a test"
+
+dataset = MyDataset()
+for batch_size in [1, 8, 64, 256]:
+    print("-" * 30)
+    print(f"Streaming batch_size={batch_size}")
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+        pass
+
+On GTX 970
+
+Streaming no batching
+100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
+
+Streaming batch_size=64
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1e8de3f357439ece9f1f29d3afda935c6ac612
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+Streaming batch_size=256
+100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
+(diminishing returns, saturated the GPU)
+
+Example where it's most a slowdown:
+thon
+class MyDataset(Dataset):
+    def len(self):
+        return 5000
+def __getitem__(self, i):
+    if i % 64 == 0:
+        n = 100
+    else:
+        n = 1
+    return "This is a test" * n
+
+This is a occasional very long sentence compared to the other. In that case, the whole batch will need to be 400
+tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
+bigger batches, the program simply crashes.
+
+Streaming no batching
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
+
+Streaming batch_size=8
+100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
+
+Streaming batch_size=64
+100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f842dd865d5032a77594f7abf496f0b050a7afc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+Streaming batch_size=256
+  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
+Traceback (most recent call last):
+  File "/home/nicolas/src/transformers/test.py", line 42, in 
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+.
+    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
+RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
+
+There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
+thumb:
+For users, a rule of thumb is:
+
+Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
+  only way to go.
+If you are latency constrained (live product doing inference), don't batch.
+If you are using CPU, don't batch.
+
+If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
+
+If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
+    try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
+    control the sequence_length.)
+
+If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
+    it until you get OOMs.
+The larger the GPU the more likely batching is going to be more interesting
+As soon as you enable batching, make sure you can handle OOMs nicely.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0f60dfc38c1d8abccd083462bcecc0f920227ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_5.txt
@@ -0,0 +1,38 @@
+Pipeline chunk batching
+zero-shot-classification and question-answering are slightly specific in the sense, that a single input might yield
+multiple forward pass of a model. Under normal circumstances, this would yield issues with batch_size argument.
+In order to circumvent this issue, both of these pipelines are a bit specific, they are ChunkPipeline instead of
+regular Pipeline. In short:
+python
+preprocessed = pipe.preprocess(inputs)
+model_outputs = pipe.forward(preprocessed)
+outputs = pipe.postprocess(model_outputs)
+Now becomes:
+python
+all_model_outputs = []
+for preprocessed in pipe.preprocess(inputs):
+    model_outputs = pipe.forward(preprocessed)
+    all_model_outputs.append(model_outputs)
+outputs = pipe.postprocess(all_model_outputs)
+This should be very transparent to your code because the pipelines are used in
+the same way.
+This is a simplified view, since the pipeline can handle automatically the batch to ! Meaning you don't have to care
+about how many forward passes you inputs are actually going to trigger, you can optimize the batch_size
+independently of the inputs. The caveats from the previous section still apply.
+Pipeline custom code
+If you want to override a specific pipeline.
+Don't hesitate to create an issue for your task at hand, the goal of the pipeline is to be easy to use and support most
+cases, so transformers could maybe support your use case.
+If you want to try simply you can:
+
+Subclass your pipeline of choice
+
+thon
+class MyPipeline(TextClassificationPipeline):
+    def postprocess():
+        # Your code goes here
+        scores = scores * 100
+        # And here
+my_pipeline = MyPipeline(model=model, tokenizer=tokenizer, )
+or if you use pipeline function, then:
+my_pipeline = pipeline(model="xxxx", pipeline_class=MyPipeline)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c0e4a8d349507b7f7e93256c9b6fbc0fa707c8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_6.txt
@@ -0,0 +1,84 @@
+That should enable you to do all the custom code you want.
+Implementing a pipeline
+Implementing a new pipeline
+Audio
+Pipelines available for audio tasks include the following.
+AudioClassificationPipeline
+[[autodoc]] AudioClassificationPipeline
+    - call
+    - all
+AutomaticSpeechRecognitionPipeline
+[[autodoc]] AutomaticSpeechRecognitionPipeline
+    - call
+    - all
+TextToAudioPipeline
+[[autodoc]] TextToAudioPipeline
+    - call
+    - all
+ZeroShotAudioClassificationPipeline
+[[autodoc]] ZeroShotAudioClassificationPipeline
+    - call
+    - all
+Computer vision
+Pipelines available for computer vision tasks include the following.
+DepthEstimationPipeline
+[[autodoc]] DepthEstimationPipeline
+    - call
+    - all
+ImageClassificationPipeline
+[[autodoc]] ImageClassificationPipeline
+    - call
+    - all
+ImageSegmentationPipeline
+[[autodoc]] ImageSegmentationPipeline
+    - call
+    - all
+ImageToImagePipeline
+[[autodoc]] ImageToImagePipeline
+    - call
+    - all
+ObjectDetectionPipeline
+[[autodoc]] ObjectDetectionPipeline
+    - call
+    - all
+VideoClassificationPipeline
+[[autodoc]] VideoClassificationPipeline
+    - call
+    - all
+ZeroShotImageClassificationPipeline
+[[autodoc]] ZeroShotImageClassificationPipeline
+    - call
+    - all
+ZeroShotObjectDetectionPipeline
+[[autodoc]] ZeroShotObjectDetectionPipeline
+    - call
+    - all
+Natural Language Processing
+Pipelines available for natural language processing tasks include the following.
+FillMaskPipeline
+[[autodoc]] FillMaskPipeline
+    - call
+    - all
+QuestionAnsweringPipeline
+[[autodoc]] QuestionAnsweringPipeline
+    - call
+    - all
+SummarizationPipeline
+[[autodoc]] SummarizationPipeline
+    - call
+    - all
+TableQuestionAnsweringPipeline
+[[autodoc]] TableQuestionAnsweringPipeline
+    - call
+TextClassificationPipeline
+[[autodoc]] TextClassificationPipeline
+    - call
+    - all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc7c207a1b79c595b453d2b08da919d7300b36b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_pipelines.txt_chunk_7.txt
@@ -0,0 +1,49 @@
+- all
+TextGenerationPipeline
+[[autodoc]] TextGenerationPipeline
+    - call
+    - all
+Text2TextGenerationPipeline
+[[autodoc]] Text2TextGenerationPipeline
+    - call
+    - all
+TokenClassificationPipeline
+[[autodoc]] TokenClassificationPipeline
+    - call
+    - all
+TranslationPipeline
+[[autodoc]] TranslationPipeline
+    - call
+    - all
+ZeroShotClassificationPipeline
+[[autodoc]] ZeroShotClassificationPipeline
+    - call
+    - all
+Multimodal
+Pipelines available for multimodal tasks include the following.
+DocumentQuestionAnsweringPipeline
+[[autodoc]] DocumentQuestionAnsweringPipeline
+    - call
+    - all
+FeatureExtractionPipeline
+[[autodoc]] FeatureExtractionPipeline
+    - call
+    - all
+ImageFeatureExtractionPipeline
+[[autodoc]] ImageFeatureExtractionPipeline
+    - call
+    - all
+ImageToTextPipeline
+[[autodoc]] ImageToTextPipeline
+    - call
+    - all
+MaskGenerationPipeline
+[[autodoc]] MaskGenerationPipeline
+    - call
+    - all
+VisualQuestionAnsweringPipeline
+[[autodoc]] VisualQuestionAnsweringPipeline
+    - call
+    - all
+Parent class: Pipeline
+[[autodoc]] Pipeline
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee04479e0f04bdc41efd0462c0e73b72354396e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Processors
+Processors can mean two different things in the Transformers library:
+- the objects that pre-process inputs for multi-modal models such as Wav2Vec2 (speech and text)
+  or CLIP (text and vision)
+- deprecated objects that were used in older versions of the library to preprocess data for GLUE or SQUAD.
+Multi-modal processors
+Any multi-modal model will require an object to encode or decode the data that groups several modalities (among text,
+vision and audio). This is handled by objects called processors, which group together two or more processing objects
+such as tokenizers (for the text modality), image processors (for vision) and feature extractors (for audio).
+Those processors inherit from the following base class that implements the saving and loading functionality:
+[[autodoc]] ProcessorMixin
+Deprecated processors
+All processors follow the same architecture which is that of the
+[~data.processors.utils.DataProcessor]. The processor returns a list of
+[~data.processors.utils.InputExample]. These
+[~data.processors.utils.InputExample] can be converted to
+[~data.processors.utils.InputFeatures] in order to be fed to the model.
+[[autodoc]] data.processors.utils.DataProcessor
+[[autodoc]] data.processors.utils.InputExample
+[[autodoc]] data.processors.utils.InputFeatures
+GLUE
+General Language Understanding Evaluation (GLUE) is a benchmark that evaluates the
+performance of models across a diverse set of existing NLU tasks. It was released together with the paper GLUE: A
+multi-task benchmark and analysis platform for natural language understanding
+This library hosts a total of 10 processors for the following tasks: MRPC, MNLI, MNLI (mismatched), CoLA, SST2, STSB,
+QQP, QNLI, RTE and WNLI.
+Those processors are:
+
+[~data.processors.utils.MrpcProcessor]
+[~data.processors.utils.MnliProcessor]
+[~data.processors.utils.MnliMismatchedProcessor]
+[~data.processors.utils.Sst2Processor]
+[~data.processors.utils.StsbProcessor]
+[~data.processors.utils.QqpProcessor]
+[~data.processors.utils.QnliProcessor]
+[~data.processors.utils.RteProcessor]
+[~data.processors.utils.WnliProcessor]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e5efb39de128b1f2639b22896a9cc4d666fe9c6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Additionally, the following method can be used to load values from a data file and convert them to a list of
+[~data.processors.utils.InputExample].
+[[autodoc]] data.processors.glue.glue_convert_examples_to_features
+XNLI
+The Cross-Lingual NLI Corpus (XNLI) is a benchmark that evaluates the
+quality of cross-lingual text representations. XNLI is crowd-sourced dataset based on MultiNLI: pairs of text are labeled with textual entailment annotations for 15
+different languages (including both high-resource language such as English and low-resource languages such as Swahili).
+It was released together with the paper XNLI: Evaluating Cross-lingual Sentence Representations
+This library hosts the processor to load the XNLI data:
+
+[~data.processors.utils.XnliProcessor]
+
+Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
+An example using these processors is given in the run_xnli.py script.
+SQuAD
+The Stanford Question Answering Dataset (SQuAD) is a benchmark that
+evaluates the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version
+(v1.1) was released together with the paper SQuAD: 100,000+ Questions for Machine Comprehension of Text. The second version (v2.0) was released alongside the paper Know What You Don't
+Know: Unanswerable Questions for SQuAD.
+This library hosts a processor for each of the two versions:
+Processors
+Those processors are:
+
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c74db150983db2d98d929c001ae105e2d0a8333
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_processors.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+[~data.processors.utils.SquadV1Processor]
+[~data.processors.utils.SquadV2Processor]
+
+They both inherit from the abstract class [~data.processors.utils.SquadProcessor]
+[[autodoc]] data.processors.squad.SquadProcessor
+    - all
+Additionally, the following method can be used to convert SQuAD examples into
+[~data.processors.utils.SquadFeatures] that can be used as model inputs.
+[[autodoc]] data.processors.squad.squad_convert_examples_to_features
+These processors as well as the aforementioned method can be used with files containing the data as well as with the
+tensorflow_datasets package. Examples are given below.
+Example usage
+Here is an example using the processors as well as the conversion method using data files:
+thon
+Loading a V2 processor
+processor = SquadV2Processor()
+examples = processor.get_dev_examples(squad_v2_data_dir)
+Loading a V1 processor
+processor = SquadV1Processor()
+examples = processor.get_dev_examples(squad_v1_data_dir)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Using tensorflow_datasets is as easy as using a data file:
+thon
+tensorflow_datasets only handle Squad V1.
+tfds_examples = tfds.load("squad")
+examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
+features = squad_convert_examples_to_features(
+    examples=examples,
+    tokenizer=tokenizer,
+    max_seq_length=max_seq_length,
+    doc_stride=args.doc_stride,
+    max_query_length=max_query_length,
+    is_training=not evaluate,
+)
+
+Another example using these processors is given in the run_squad.py script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_quantization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f06d4f5eeaed4c5f87655fb147c5d0b3025f8f3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_quantization.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Quantization
+Quantization techniques reduce memory and computational costs by representing weights and activations with lower-precision data types like 8-bit integers (int8). This enables loading larger models you normally wouldn't be able to fit into memory, and speeding up inference. Transformers supports the AWQ and GPTQ quantization algorithms and it supports 8-bit and 4-bit quantization with bitsandbytes.
+Quantization techniques that aren't supported in Transformers can be added with the [HfQuantizer] class.
+
+Learn how to quantize models in the Quantization guide.
+
+QuantoConfig
+[[autodoc]] QuantoConfig
+AqlmConfig
+[[autodoc]] AqlmConfig
+AwqConfig
+[[autodoc]] AwqConfig
+EetqConfig
+[[autodoc]] EetqConfig
+GPTQConfig
+[[autodoc]] GPTQConfig
+BitsAndBytesConfig
+[[autodoc]] BitsAndBytesConfig
+HfQuantizer
+[[autodoc]] quantizers.base.HfQuantizer
+HqqConfig
+[[autodoc]] HqqConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95582c89cc7013da6dfffe75462743679d06c8d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_text_generation.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Generation
+Each framework has a generate method for text generation implemented in their respective GenerationMixin class:
+
+PyTorch [~generation.GenerationMixin.generate] is implemented in [~generation.GenerationMixin].
+TensorFlow [~generation.TFGenerationMixin.generate] is implemented in [~generation.TFGenerationMixin].
+Flax/JAX [~generation.FlaxGenerationMixin.generate] is implemented in [~generation.FlaxGenerationMixin].
+
+Regardless of your framework of choice, you can parameterize the generate method with a [~generation.GenerationConfig]
+class instance. Please refer to this class for the complete list of generation parameters, which control the behavior
+of the generation method.
+To learn how to inspect a model's generation configuration, what are the defaults, how to change the parameters ad hoc,
+and how to create and save a customized generation configuration, refer to the
+text generation strategies guide. The guide also explains how to use related features,
+like token streaming.
+GenerationConfig
+[[autodoc]] generation.GenerationConfig
+    - from_pretrained
+    - from_model_config
+    - save_pretrained
+    - update
+    - validate
+    - get_generation_mode
+[[autodoc]] generation.WatermarkingConfig
+GenerationMixin
+[[autodoc]] generation.GenerationMixin
+    - generate
+    - compute_transition_scores
+TFGenerationMixin
+[[autodoc]] generation.TFGenerationMixin
+    - generate
+    - compute_transition_scores
+FlaxGenerationMixin
+[[autodoc]] generation.FlaxGenerationMixin
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1bcb0105e8add07854c40c73163134ce4ee089a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Tokenizer
+A tokenizer is in charge of preparing the inputs for a model. The library contains tokenizers for all the models. Most
+of the tokenizers are available in two flavors: a full python implementation and a "Fast" implementation based on the
+Rust library 🤗 Tokenizers. The "Fast" implementations allows:
+
+a significant speed-up in particular when doing batched tokenization and
+additional methods to map between the original string (character and words) and the token space (e.g. getting the
+   index of the token comprising a given character or the span of characters corresponding to a given token). 
+
+The base classes [PreTrainedTokenizer] and [PreTrainedTokenizerFast]
+implement the common methods for encoding string inputs in model inputs (see below) and instantiating/saving python and
+"Fast" tokenizers either from a local file or directory or from a pretrained tokenizer provided by the library
+(downloaded from HuggingFace's AWS S3 repository). They both rely on
+[~tokenization_utils_base.PreTrainedTokenizerBase] that contains the common methods, and
+[~tokenization_utils_base.SpecialTokensMixin].
+[PreTrainedTokenizer] and [PreTrainedTokenizerFast] thus implement the main
+methods for using all the tokenizers:
+
+Tokenizing (splitting strings in sub-word token strings), converting tokens strings to ids and back, and
+  encoding/decoding (i.e., tokenizing and converting to integers).
+Adding new tokens to the vocabulary in a way that is independent of the underlying structure (BPE, SentencePiece).
+Managing special tokens (like mask, beginning-of-sentence, etc.): adding them, assigning them to attributes in the
+  tokenizer for easy access and making sure they are not split during tokenization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fdc245c96eaffbea6fc5d6c04d8841a72c4d968
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_tokenizer.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+[BatchEncoding] holds the output of the
+[~tokenization_utils_base.PreTrainedTokenizerBase]'s encoding methods (__call__,
+encode_plus and batch_encode_plus) and is derived from a Python dictionary. When the tokenizer is a pure python
+tokenizer, this class behaves just like a standard python dictionary and holds the various model inputs computed by
+these methods (input_ids, attention_mask). When the tokenizer is a "Fast" tokenizer (i.e., backed by
+HuggingFace tokenizers library), this class provides in addition
+several advanced alignment methods which can be used to map between the original string (character and words) and the
+token space (e.g., getting the index of the token comprising a given character or the span of characters corresponding
+to a given token).
+PreTrainedTokenizer
+[[autodoc]] PreTrainedTokenizer
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+PreTrainedTokenizerFast
+The [PreTrainedTokenizerFast] depend on the tokenizers library. The tokenizers obtained from the 🤗 tokenizers library can be
+loaded very simply into 🤗 transformers. Take a look at the Using tokenizers from 🤗 tokenizers page to understand how this is done.
+[[autodoc]] PreTrainedTokenizerFast
+    - call
+    - add_tokens
+    - add_special_tokens
+    - apply_chat_template
+    - batch_decode
+    - decode
+    - encode
+    - push_to_hub
+    - all
+BatchEncoding
+[[autodoc]] BatchEncoding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_trainer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7741630488f2fcedc580d329ce9237d611421ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/main_classes_trainer.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Trainer
+The [Trainer] class provides an API for feature-complete training in PyTorch, and it supports distributed training on multiple GPUs/TPUs, mixed precision for NVIDIA GPUs, AMD GPUs, and torch.amp for PyTorch. [Trainer] goes hand-in-hand with the [TrainingArguments] class, which offers a wide range of options to customize how a model is trained. Together, these two classes provide a complete training API.
+[Seq2SeqTrainer] and [Seq2SeqTrainingArguments] inherit from the [Trainer] and [TrainingArgument] classes and they're adapted for training models for sequence-to-sequence tasks such as summarization or translation.
+
+The [Trainer] class is optimized for 🤗 Transformers models and can have surprising behaviors
+when used with other models. When using it with your own model, make sure:
+
+your model always return tuples or subclasses of [~utils.ModelOutput]
+your model can compute the loss if a labels argument is provided and that loss is returned as the first
+  element of the tuple (if your model returns tuples)
+your model can accept multiple label arguments (use label_names in [TrainingArguments] to indicate their name to the [Trainer]) but none of them should be named "label"
+
+Trainer[[api-reference]]
+[[autodoc]] Trainer
+    - all
+Seq2SeqTrainer
+[[autodoc]] Seq2SeqTrainer
+    - evaluate
+    - predict
+TrainingArguments
+[[autodoc]] TrainingArguments
+    - all
+Seq2SeqTrainingArguments
+[[autodoc]] Seq2SeqTrainingArguments
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5942468a88593fea3f422342a3f5bea5d1b416b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+ALBERT
+
+Overview
+The ALBERT model was proposed in ALBERT: A Lite BERT for Self-supervised Learning of Language Representations by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma,
+Radu Soricut. It presents two parameter-reduction techniques to lower memory consumption and increase the training
+speed of BERT:
+
+Splitting the embedding matrix into two smaller matrices.
+Using repeating layers split among groups.
+
+The abstract from the paper is the following:
+Increasing model size when pretraining natural language representations often results in improved performance on
+downstream tasks. However, at some point further model increases become harder due to GPU/TPU memory limitations,
+longer training times, and unexpected model degradation. To address these problems, we present two parameter-reduction
+techniques to lower memory consumption and increase the training speed of BERT. Comprehensive empirical evidence shows
+that our proposed methods lead to models that scale much better compared to the original BERT. We also use a
+self-supervised loss that focuses on modeling inter-sentence coherence, and show it consistently helps downstream tasks
+with multi-sentence inputs. As a result, our best model establishes new state-of-the-art results on the GLUE, RACE, and
+SQuAD benchmarks while having fewer parameters compared to BERT-large.
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fae6ab5211ec1c447a7bcaa8470d361c91936ffc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+ALBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+ALBERT uses repeating layers which results in a small memory footprint, however the computational cost remains
+  similar to a BERT-like architecture with the same number of hidden layers as it has to iterate through the same
+  number of (repeating) layers.
+Embedding size E is different from hidden size H justified because the embeddings are context independent (one embedding vector represents one token), whereas hidden states are context dependent (one hidden state represents a sequence of tokens) so it's more logical to have H >> E. Also, the embedding matrix is large since it's V x E (V being the vocab size). If E < H, it has less parameters.
+Layers are split in groups that share parameters (to save memory).
+Next sentence prediction is replaced by a sentence ordering prediction: in the inputs, we have two sentences A and B (that are consecutive) and we either feed A followed by B or B followed by A. The model must predict if they have been swapped or not.
+
+This model was contributed by lysandre. This model jax version was contributed by
+kamalkraj. The original code can be found here.
+Resources
+The resources provided in the following sections consist of a list of official Hugging Face and community (indicated by 🌎) resources to help you get started with AlBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[AlbertForSequenceClassification] is supported by this example script.
+
+[TFAlbertForSequenceClassification] is supported by this example script.
+
+[FlaxAlbertForSequenceClassification] is supported by this example script and notebook.
+
+Check the Text classification task guide on how to use the model.
+
+[AlbertForTokenClassification] is supported by this example script.
+
+[TFAlbertForTokenClassification] is supported by this example script and notebook.
+
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2690581f42f2af1da90852d0a43615bbb314e93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_2.txt
@@ -0,0 +1,59 @@
+[FlaxAlbertForTokenClassification] is supported by this example script.
+
+Token classification chapter of the 🤗 Hugging Face Course.
+Check the Token classification task guide on how to use the model.
+
+[AlbertForMaskedLM] is supported by this example script and notebook.
+[TFAlbertForMaskedLM] is supported by this example script and notebook.
+[FlaxAlbertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Check the Masked language modeling task guide on how to use the model.
+
+[AlbertForQuestionAnswering] is supported by this example script and notebook.
+[TFAlbertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxAlbertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Check the Question answering task guide on how to use the model.
+
+Multiple choice
+
+[AlbertForMultipleChoice] is supported by this example script and notebook.
+
+[TFAlbertForMultipleChoice] is supported by this example script and notebook.
+
+Check the  Multiple choice task guide on how to use the model.
+
+AlbertConfig
+[[autodoc]] AlbertConfig
+AlbertTokenizer
+[[autodoc]] AlbertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+AlbertTokenizerFast
+[[autodoc]] AlbertTokenizerFast
+Albert specific outputs
+[[autodoc]] models.albert.modeling_albert.AlbertForPreTrainingOutput
+[[autodoc]] models.albert.modeling_tf_albert.TFAlbertForPreTrainingOutput
+
+AlbertModel
+[[autodoc]] AlbertModel
+    - forward
+AlbertForPreTraining
+[[autodoc]] AlbertForPreTraining
+    - forward
+AlbertForMaskedLM
+[[autodoc]] AlbertForMaskedLM
+    - forward
+AlbertForSequenceClassification
+[[autodoc]] AlbertForSequenceClassification
+    - forward
+AlbertForMultipleChoice
+[[autodoc]] AlbertForMultipleChoice
+AlbertForTokenClassification
+[[autodoc]] AlbertForTokenClassification
+    - forward
+AlbertForQuestionAnswering
+[[autodoc]] AlbertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b2fa85e4f4549606053d420c7e43f1d46ab2f18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_albert.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+TFAlbertModel
+[[autodoc]] TFAlbertModel
+    - call
+TFAlbertForPreTraining
+[[autodoc]] TFAlbertForPreTraining
+    - call
+TFAlbertForMaskedLM
+[[autodoc]] TFAlbertForMaskedLM
+    - call
+TFAlbertForSequenceClassification
+[[autodoc]] TFAlbertForSequenceClassification
+    - call
+TFAlbertForMultipleChoice
+[[autodoc]] TFAlbertForMultipleChoice
+    - call
+TFAlbertForTokenClassification
+[[autodoc]] TFAlbertForTokenClassification
+    - call
+TFAlbertForQuestionAnswering
+[[autodoc]] TFAlbertForQuestionAnswering
+    - call
+
+FlaxAlbertModel
+[[autodoc]] FlaxAlbertModel
+    - call
+FlaxAlbertForPreTraining
+[[autodoc]] FlaxAlbertForPreTraining
+    - call
+FlaxAlbertForMaskedLM
+[[autodoc]] FlaxAlbertForMaskedLM
+    - call
+FlaxAlbertForSequenceClassification
+[[autodoc]] FlaxAlbertForSequenceClassification
+    - call
+FlaxAlbertForMultipleChoice
+[[autodoc]] FlaxAlbertForMultipleChoice
+    - call
+FlaxAlbertForTokenClassification
+[[autodoc]] FlaxAlbertForTokenClassification
+    - call
+FlaxAlbertForQuestionAnswering
+[[autodoc]] FlaxAlbertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b470b23285190a762241d3e7876449ca76af24d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+ALIGN
+Overview
+The ALIGN model was proposed in Scaling Up Visual and Vision-Language Representation Learning With Noisy Text Supervision by Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc V. Le, Yunhsuan Sung, Zhen Li, Tom Duerig. ALIGN is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image classification. ALIGN features a dual-encoder architecture with EfficientNet as its vision encoder and BERT as its text encoder, and learns to align visual and text representations with contrastive learning. Unlike previous work, ALIGN leverages a massive noisy dataset and shows that the scale of the corpus can be used to achieve SOTA representations with a simple recipe.
+The abstract from the paper is the following:
+Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.
+This model was contributed by Alara Dirik.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..44ec1be5e43fc362a0f5f8d85e4485de08f77167
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+This model was contributed by Alara Dirik.
+The original code is not released, this implementation is based on the Kakao Brain implementation based on the original paper.
+Usage example
+ALIGN uses EfficientNet to get visual features and BERT to get the text features. Both the text and visual features are then projected to a latent space with identical dimension. The dot product between the projected image and text features is then used as a similarity score.
+[AlignProcessor] wraps [EfficientNetImageProcessor] and [BertTokenizer] into a single instance to both encode the text and preprocess the images. The following example shows how to get the image-text similarity scores using [AlignProcessor] and [AlignModel].
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AlignProcessor, AlignModel
+processor = AlignProcessor.from_pretrained("kakaobrain/align-base")
+model = AlignModel.from_pretrained("kakaobrain/align-base")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["an image of a cat", "an image of a dog"]
+inputs = processor(text=candidate_labels, images=image, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs)
+this is the image-text similarity score
+logits_per_image = outputs.logits_per_image
+we can take the softmax to get the label probabilities
+probs = logits_per_image.softmax(dim=1)
+print(probs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d35128b2da8019b992092bd6b70900693190c61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_align.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ALIGN.
+
+A blog post on ALIGN and the COYO-700M dataset.
+A zero-shot image classification demo.
+Model card of kakaobrain/align-base model.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it. The resource should ideally demonstrate something new instead of duplicating an existing resource.
+AlignConfig
+[[autodoc]] AlignConfig
+    - from_text_vision_configs
+AlignTextConfig
+[[autodoc]] AlignTextConfig
+AlignVisionConfig
+[[autodoc]] AlignVisionConfig
+AlignProcessor
+[[autodoc]] AlignProcessor
+AlignModel
+[[autodoc]] AlignModel
+    - forward
+    - get_text_features
+    - get_image_features
+AlignTextModel
+[[autodoc]] AlignTextModel
+    - forward
+AlignVisionModel
+[[autodoc]] AlignVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df0f50687c731419b814fff4e5e1b218cc581402
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+AltCLIP
+Overview
+The AltCLIP model was proposed in AltCLIP: Altering the Language Encoder in CLIP for Extended Language Capabilities by Zhongzhi Chen, Guang Liu, Bo-Wen Zhang, Fulong Ye, Qinghong Yang, Ledell Wu. AltCLIP
+(Altering the Language Encoder in CLIP) is a neural network trained on a variety of image-text and text-text pairs. By switching CLIP's
+text encoder with a pretrained multilingual text encoder XLM-R, we could obtain very close performances with CLIP on almost all tasks, and extended original CLIP's capabilities such as multilingual understanding.
+The abstract from the paper is the following:
+In this work, we present a conceptually simple and effective method to train a strong bilingual multimodal representation model. 
+Starting from the pretrained multimodal representation model CLIP released by OpenAI, we switched its text encoder with a pretrained 
+multilingual text encoder XLM-R, and aligned both languages and image representations by a two-stage training schema consisting of 
+teacher learning and contrastive learning. We validate our method through evaluations of a wide range of tasks. We set new state-of-the-art 
+performances on a bunch of tasks including ImageNet-CN, Flicker30k- CN, and COCO-CN. Further, we obtain very close performances with 
+CLIP on almost all tasks, suggesting that one can simply alter the text encoder in CLIP for extended capabilities such as multilingual understanding.
+This model was contributed by jongjyh.
+Usage tips and example
+The usage of AltCLIP is very similar to the CLIP. the difference between CLIP is the text encoder. Note that we use bidirectional attention instead of casual attention
+and we take the [CLS] token in XLM-R to represent text embedding.
+AltCLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. AltCLIP uses a ViT like transformer to get visual features and a bidirectional language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad83dc77020af02f20b716b7d18746537cf76815
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [AltCLIPProcessor] wraps a [CLIPImageProcessor] and a [XLMRobertaTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[AltCLIPProcessor] and [AltCLIPModel].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d82e0f8b92f71e9aeb0ea18281ba730ebdf2ab00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_altclip.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+from PIL import Image
+import requests
+from transformers import AltCLIPModel, AltCLIPProcessor
+model = AltCLIPModel.from_pretrained("BAAI/AltCLIP")
+processor = AltCLIPProcessor.from_pretrained("BAAI/AltCLIP")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+This model is based on CLIPModel, use it like you would use the original CLIP.
+
+AltCLIPConfig
+[[autodoc]] AltCLIPConfig
+    - from_text_vision_configs
+AltCLIPTextConfig
+[[autodoc]] AltCLIPTextConfig
+AltCLIPVisionConfig
+[[autodoc]] AltCLIPVisionConfig
+AltCLIPProcessor
+[[autodoc]] AltCLIPProcessor
+AltCLIPModel
+[[autodoc]] AltCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+AltCLIPTextModel
+[[autodoc]] AltCLIPTextModel
+    - forward
+AltCLIPVisionModel
+[[autodoc]] AltCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fd4751d9b7c04191fc43bbf1691983b1410dc77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Audio Spectrogram Transformer
+Overview
+The Audio Spectrogram Transformer model was proposed in AST: Audio Spectrogram Transformer by Yuan Gong, Yu-An Chung, James Glass.
+The Audio Spectrogram Transformer applies a Vision Transformer to audio, by turning audio into an image (spectrogram). The model obtains state-of-the-art results
+for audio classification.
+The abstract from the paper is the following:
+In the past decade, convolutional neural networks (CNNs) have been widely adopted as the main building block for end-to-end audio classification models, which aim to learn a direct mapping from audio spectrograms to corresponding labels. To better capture long-range global context, a recent trend is to add a self-attention mechanism on top of the CNN, forming a CNN-attention hybrid model. However, it is unclear whether the reliance on a CNN is necessary, and if neural networks purely based on attention are sufficient to obtain good performance in audio classification. In this paper, we answer the question by introducing the Audio Spectrogram Transformer (AST), the first convolution-free, purely attention-based model for audio classification. We evaluate AST on various audio classification benchmarks, where it achieves new state-of-the-art results of 0.485 mAP on AudioSet, 95.6% accuracy on ESC-50, and 98.1% accuracy on Speech Commands V2.
+
+ Audio Spectrogram Transformer architecture. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+When fine-tuning the Audio Spectrogram Transformer (AST) on your own dataset, it's recommended to take care of the input normalization (to make
+sure the input has mean of 0 and std of 0.5). [ASTFeatureExtractor] takes care of this. Note that it uses the AudioSet
+mean and std by default. You can check ast/src/get_norm_stats.py to see how
+the authors compute the stats for a downstream dataset.
+Note that the AST needs a low learning rate (the authors use a 10 times smaller learning rate compared to their CNN model proposed in the
+PSLA paper) and converges quickly, so please search for a suitable learning rate and learning rate scheduler for your task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1528449e9f99530f82eaabbebe50e393a0135301
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ASTForAudioClassification
+model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MIT/ast-finetuned-audioset-10-10-0.4593 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        27 |                                         6 |                      4.5 |
+|            2 |                                        12 |                                         6 |                      2   |
+|            4 |                                        21 |                                         8 |                      2.62 |
+|            8 |                                        40 |                                        14 |                      2.86 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with the Audio Spectrogram Transformer.
+
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..794c923d2449ea0b6a1588537740ede542a5c125
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_audio-spectrogram-transformer.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+A notebook illustrating inference with AST for audio classification can be found here.
+[ASTForAudioClassification] is supported by this example script and notebook.
+See also: Audio classification.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ASTConfig
+[[autodoc]] ASTConfig
+ASTFeatureExtractor
+[[autodoc]] ASTFeatureExtractor
+    - call
+ASTModel
+[[autodoc]] ASTModel
+    - forward
+ASTForAudioClassification
+[[autodoc]] ASTForAudioClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..477660aacb89b972bc2099520674f542b0e8349d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Auto Classes
+In many cases, the architecture you want to use can be guessed from the name or the path of the pretrained model you
+are supplying to the from_pretrained() method. AutoClasses are here to do this job for you so that you
+automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary.
+Instantiating one of [AutoConfig], [AutoModel], and
+[AutoTokenizer] will directly create a class of the relevant architecture. For instance
+python
+model = AutoModel.from_pretrained("google-bert/bert-base-cased")
+will create a model that is an instance of [BertModel].
+There is one class of AutoModel for each task, and for each backend (PyTorch, TensorFlow, or Flax).
+Extending the Auto Classes
+Each of the auto classes has a method to be extended with your custom classes. For instance, if you have defined a
+custom class of model NewModel, make sure you have a NewModelConfig then you can add those to the auto
+classes like this:
+thon
+from transformers import AutoConfig, AutoModel
+AutoConfig.register("new-model", NewModelConfig)
+AutoModel.register(NewModelConfig, NewModel)
+
+You will then be able to use the auto classes like you would usually do!
+
+If your NewModelConfig is a subclass of [~transformers.PretrainedConfig], make sure its
+model_type attribute is set to the same key you use when registering the config (here "new-model").
+Likewise, if your NewModel is a subclass of [PreTrainedModel], make sure its
+config_class attribute is set to the same class you use when registering the model (here
+NewModelConfig).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3a8bbb5b50d7d526e5f1fd0d7a61fa3309a0644
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+AutoConfig
+[[autodoc]] AutoConfig
+AutoTokenizer
+[[autodoc]] AutoTokenizer
+AutoFeatureExtractor
+[[autodoc]] AutoFeatureExtractor
+AutoImageProcessor
+[[autodoc]] AutoImageProcessor
+AutoProcessor
+[[autodoc]] AutoProcessor
+Generic model classes
+The following auto classes are available for instantiating a base model class without a specific head.
+AutoModel
+[[autodoc]] AutoModel
+TFAutoModel
+[[autodoc]] TFAutoModel
+FlaxAutoModel
+[[autodoc]] FlaxAutoModel
+Generic pretraining classes
+The following auto classes are available for instantiating a model with a pretraining head.
+AutoModelForPreTraining
+[[autodoc]] AutoModelForPreTraining
+TFAutoModelForPreTraining
+[[autodoc]] TFAutoModelForPreTraining
+FlaxAutoModelForPreTraining
+[[autodoc]] FlaxAutoModelForPreTraining
+Natural Language Processing
+The following auto classes are available for the following natural language processing tasks.
+AutoModelForCausalLM
+[[autodoc]] AutoModelForCausalLM
+TFAutoModelForCausalLM
+[[autodoc]] TFAutoModelForCausalLM
+FlaxAutoModelForCausalLM
+[[autodoc]] FlaxAutoModelForCausalLM
+AutoModelForMaskedLM
+[[autodoc]] AutoModelForMaskedLM
+TFAutoModelForMaskedLM
+[[autodoc]] TFAutoModelForMaskedLM
+FlaxAutoModelForMaskedLM
+[[autodoc]] FlaxAutoModelForMaskedLM
+AutoModelForMaskGeneration
+[[autodoc]] AutoModelForMaskGeneration
+TFAutoModelForMaskGeneration
+[[autodoc]] TFAutoModelForMaskGeneration
+AutoModelForSeq2SeqLM
+[[autodoc]] AutoModelForSeq2SeqLM
+TFAutoModelForSeq2SeqLM
+[[autodoc]] TFAutoModelForSeq2SeqLM
+FlaxAutoModelForSeq2SeqLM
+[[autodoc]] FlaxAutoModelForSeq2SeqLM
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8fa50ba4d5dd53eb281458c721642fdf972e2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_2.txt
@@ -0,0 +1,50 @@
+AutoModelForSequenceClassification
+[[autodoc]] AutoModelForSequenceClassification
+TFAutoModelForSequenceClassification
+[[autodoc]] TFAutoModelForSequenceClassification
+FlaxAutoModelForSequenceClassification
+[[autodoc]] FlaxAutoModelForSequenceClassification
+AutoModelForMultipleChoice
+[[autodoc]] AutoModelForMultipleChoice
+TFAutoModelForMultipleChoice
+[[autodoc]] TFAutoModelForMultipleChoice
+FlaxAutoModelForMultipleChoice
+[[autodoc]] FlaxAutoModelForMultipleChoice
+AutoModelForNextSentencePrediction
+[[autodoc]] AutoModelForNextSentencePrediction
+TFAutoModelForNextSentencePrediction
+[[autodoc]] TFAutoModelForNextSentencePrediction
+FlaxAutoModelForNextSentencePrediction
+[[autodoc]] FlaxAutoModelForNextSentencePrediction
+AutoModelForTokenClassification
+[[autodoc]] AutoModelForTokenClassification
+TFAutoModelForTokenClassification
+[[autodoc]] TFAutoModelForTokenClassification
+FlaxAutoModelForTokenClassification
+[[autodoc]] FlaxAutoModelForTokenClassification
+AutoModelForQuestionAnswering
+[[autodoc]] AutoModelForQuestionAnswering
+TFAutoModelForQuestionAnswering
+[[autodoc]] TFAutoModelForQuestionAnswering
+FlaxAutoModelForQuestionAnswering
+[[autodoc]] FlaxAutoModelForQuestionAnswering
+AutoModelForTextEncoding
+[[autodoc]] AutoModelForTextEncoding
+TFAutoModelForTextEncoding
+[[autodoc]] TFAutoModelForTextEncoding
+Computer vision
+The following auto classes are available for the following computer vision tasks.
+AutoModelForDepthEstimation
+[[autodoc]] AutoModelForDepthEstimation
+AutoModelForImageClassification
+[[autodoc]] AutoModelForImageClassification
+TFAutoModelForImageClassification
+[[autodoc]] TFAutoModelForImageClassification
+FlaxAutoModelForImageClassification
+[[autodoc]] FlaxAutoModelForImageClassification
+AutoModelForVideoClassification
+[[autodoc]] AutoModelForVideoClassification
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac39b96fab16b331f9a685fb33cfff11323da33d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_3.txt
@@ -0,0 +1,49 @@
+AutoModelForKeypointDetection
+[[autodoc]] AutoModelForKeypointDetection
+AutoModelForMaskedImageModeling
+[[autodoc]] AutoModelForMaskedImageModeling
+TFAutoModelForMaskedImageModeling
+[[autodoc]] TFAutoModelForMaskedImageModeling
+AutoModelForObjectDetection
+[[autodoc]] AutoModelForObjectDetection
+AutoModelForImageSegmentation
+[[autodoc]] AutoModelForImageSegmentation
+AutoModelForImageToImage
+[[autodoc]] AutoModelForImageToImage
+AutoModelForSemanticSegmentation
+[[autodoc]] AutoModelForSemanticSegmentation
+TFAutoModelForSemanticSegmentation
+[[autodoc]] TFAutoModelForSemanticSegmentation
+AutoModelForInstanceSegmentation
+[[autodoc]] AutoModelForInstanceSegmentation
+AutoModelForUniversalSegmentation
+[[autodoc]] AutoModelForUniversalSegmentation
+AutoModelForZeroShotImageClassification
+[[autodoc]] AutoModelForZeroShotImageClassification
+TFAutoModelForZeroShotImageClassification
+[[autodoc]] TFAutoModelForZeroShotImageClassification
+AutoModelForZeroShotObjectDetection
+[[autodoc]] AutoModelForZeroShotObjectDetection
+Audio
+The following auto classes are available for the following audio tasks.
+AutoModelForAudioClassification
+[[autodoc]] AutoModelForAudioClassification
+AutoModelForAudioFrameClassification
+[[autodoc]] TFAutoModelForAudioClassification
+TFAutoModelForAudioFrameClassification
+[[autodoc]] AutoModelForAudioFrameClassification
+AutoModelForCTC
+[[autodoc]] AutoModelForCTC
+AutoModelForSpeechSeq2Seq
+[[autodoc]] AutoModelForSpeechSeq2Seq
+TFAutoModelForSpeechSeq2Seq
+[[autodoc]] TFAutoModelForSpeechSeq2Seq
+FlaxAutoModelForSpeechSeq2Seq
+[[autodoc]] FlaxAutoModelForSpeechSeq2Seq
+AutoModelForAudioXVector
+[[autodoc]] AutoModelForAudioXVector
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97b16a78a31ab012e5193c3525a01c976559cf53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_auto.txt_chunk_4.txt
@@ -0,0 +1,22 @@
+AutoModelForTextToSpectrogram
+[[autodoc]] AutoModelForTextToSpectrogram
+AutoModelForTextToWaveform
+[[autodoc]] AutoModelForTextToWaveform
+Multimodal
+The following auto classes are available for the following multimodal tasks.
+AutoModelForTableQuestionAnswering
+[[autodoc]] AutoModelForTableQuestionAnswering
+TFAutoModelForTableQuestionAnswering
+[[autodoc]] TFAutoModelForTableQuestionAnswering
+AutoModelForDocumentQuestionAnswering
+[[autodoc]] AutoModelForDocumentQuestionAnswering
+TFAutoModelForDocumentQuestionAnswering
+[[autodoc]] TFAutoModelForDocumentQuestionAnswering
+AutoModelForVisualQuestionAnswering
+[[autodoc]] AutoModelForVisualQuestionAnswering
+AutoModelForVision2Seq
+[[autodoc]] AutoModelForVision2Seq
+TFAutoModelForVision2Seq
+[[autodoc]] TFAutoModelForVision2Seq
+FlaxAutoModelForVision2Seq
+[[autodoc]] FlaxAutoModelForVision2Seq
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f143a146e88d837b9935d1e40c3f3c3406b5d3b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_autoformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Autoformer
+Overview
+The Autoformer model was proposed in Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting by Haixu Wu, Jiehui Xu, Jianmin Wang, Mingsheng Long.
+This model augments the Transformer as a deep decomposition architecture, which can progressively decompose the trend and seasonal components during the forecasting process.
+The abstract from the paper is the following:
+Extending the forecasting time is a critical demand for real applications, such as extreme weather early warning and long-term energy consumption planning. This paper studies the long-term forecasting problem of time series. Prior Transformer-based models adopt various self-attention mechanisms to discover the long-range dependencies. However, intricate temporal patterns of the long-term future prohibit the model from finding reliable dependencies. Also, Transformers have to adopt the sparse versions of point-wise self-attentions for long series efficiency, resulting in the information utilization bottleneck. Going beyond Transformers, we design Autoformer as a novel decomposition architecture with an Auto-Correlation mechanism. We break with the pre-processing convention of series decomposition and renovate it as a basic inner block of deep models. This design empowers Autoformer with progressive decomposition capacities for complex time series. Further, inspired by the stochastic process theory, we design the Auto-Correlation mechanism based on the series periodicity, which conducts the dependencies discovery and representation aggregation at the sub-series level. Auto-Correlation outperforms self-attention in both efficiency and accuracy. In long-term forecasting, Autoformer yields state-of-the-art accuracy, with a 38% relative improvement on six benchmarks, covering five practical applications: energy, traffic, economics, weather and disease.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Autoformer blog-post in HuggingFace blog: Yes, Transformers are Effective for Time Series Forecasting (+ Autoformer)
+
+AutoformerConfig
+[[autodoc]] AutoformerConfig
+AutoformerModel
+[[autodoc]] AutoformerModel
+    - forward
+AutoformerForPrediction
+[[autodoc]] AutoformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..747b6bba32847c541b84a682238e26073822f877
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Bark
+Overview
+Bark is a transformer-based text-to-speech model proposed by Suno AI in suno-ai/bark.
+Bark is made of 4 main models:
+
+[BarkSemanticModel] (also referred to as the 'text' model): a causal auto-regressive transformer model that takes as input tokenized text, and predicts semantic text tokens that capture the meaning of the text.
+[BarkCoarseModel] (also referred to as the 'coarse acoustics' model): a causal autoregressive transformer, that takes as input the results of the [BarkSemanticModel] model. It aims at predicting the first two audio codebooks necessary for EnCodec.
+[BarkFineModel] (the 'fine acoustics' model), this time a non-causal autoencoder transformer, which iteratively predicts the last codebooks based on the sum of the previous codebooks embeddings.
+having predicted all the codebook channels from the [EncodecModel], Bark uses it to decode the output audio array.
+
+It should be noted that each of the first three modules can support conditional speaker embeddings to condition the output sound according to specific predefined voice.
+This model was contributed by Yoach Lacombe (ylacombe) and Sanchit Gandhi (sanchit-gandhi).
+The original code can be found here.
+Optimizing Bark
+Bark can be optimized with just a few extra lines of code, which significantly reduces its memory footprint and accelerates inference.
+Using half-precision
+You can speed up inference and reduce memory footprint by 50% simply by loading the model in half-precision.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29ee7d24c3269bb836dfa54a3ec5b9fe7a2ef08c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Using CPU offload
+As mentioned above, Bark is made up of 4 sub-models, which are called up sequentially during audio generation. In other words, while one sub-model is in use, the other sub-models are idle.
+If you're using a CUDA device, a simple solution to benefit from an 80% reduction in memory footprint is to offload the submodels from GPU to CPU when they're idle. This operation is called CPU offloading. You can use it with one line of code as follows:
+python
+model.enable_cpu_offload()
+Note that 🤗 Accelerate must be installed before using this feature. Here's how to install it.
+Using Better Transformer
+Better Transformer is an 🤗 Optimum feature that performs kernel fusion under the hood. You can gain 20% to 30% in speed with zero performance degradation. It only requires one line of code to export the model to 🤗 Better Transformer:
+python
+model =  model.to_bettertransformer()
+Note that 🤗 Optimum must be installed before using this feature. Here's how to install it.
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aad35c00f7e77de7f6e782362ec10a541ca9a408
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the attn_implementation="flash_attention_2" flag to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+python
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+Performance comparison
+The following diagram shows the latency for the native attention implementation (no optimisation) against Better Transformer and Flash Attention 2. In all cases, we generate 400 semantic tokens on a 40GB A100 GPU with PyTorch 2.1. Flash Attention 2 is also consistently faster than Better Transformer, and its performance improves even more as batch sizes increase:
+
+To put this into perspective, on an NVIDIA A100 and when generating 400 semantic tokens with a batch size of 16, you can get 17 times the throughput and still be 2 seconds faster than generating sentences one by one with the native model implementation. In other words, all the samples will be generated 17 times faster.
+At batch size 8, on an NVIDIA A100, Flash Attention 2 is also 10% faster than Better Transformer, and at batch size 16, 25%.
+Combining optimization techniques
+You can combine optimization techniques, and use CPU offload, half-precision and Flash Attention 2 (or 🤗 Better Transformer) all at once.
+thon
+from transformers import BarkModel
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+load in fp16 and use Flash Attention 2
+model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+enable CPU offload
+model.enable_cpu_offload()
+
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7ad7ea1d3c43d5b8d6ccc89a0195e60e6a10a06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+Find out more on inference optimization techniques here.
+Usage tips
+Suno offers a library of voice presets in a number of languages here.
+These presets are also uploaded in the hub here or here.
+thon
+
+from transformers import AutoProcessor, BarkModel
+processor = AutoProcessor.from_pretrained("suno/bark")
+model = BarkModel.from_pretrained("suno/bark")
+voice_preset = "v2/en_speaker_6"
+inputs = processor("Hello, my dog is cute", voice_preset=voice_preset)
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. 
+thon
+
+Multilingual speech - simplified Chinese
+inputs = processor("惊人的！我会说中文")
+Multilingual speech - French - let's use a voice_preset as well
+inputs = processor("Incroyable! Je peux générer du son.", voice_preset="fr_speaker_5")
+Bark can also generate music. You can help it out by adding music notes around your lyrics.
+inputs = processor("♪ Hello, my dog is cute ♪")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+The model can also produce nonverbal communications like laughing, sighing and crying.
+thon
+
+Adding non-speech cues to the input text
+inputs = processor("Hello uh  [clears throat], my dog is cute [laughter]")
+audio_array = model.generate(**inputs)
+audio_array = audio_array.cpu().numpy().squeeze()
+
+To save the audio, simply take the sample rate from the model config and some scipy utility:
+thon
+
+from scipy.io.wavfile import write as write_wav
+save audio to disk, but first take the sample rate from the model config
+sample_rate = model.generation_config.sample_rate
+write_wav("bark_generation.wav", sample_rate, audio_array)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a587da06c615f9c4e77a9adf5abe117fc69b62d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bark.txt_chunk_4.txt
@@ -0,0 +1,32 @@
+BarkConfig
+[[autodoc]] BarkConfig
+    - all
+BarkProcessor
+[[autodoc]] BarkProcessor
+    - all
+    - call
+BarkModel
+[[autodoc]] BarkModel
+    - generate
+    - enable_cpu_offload
+BarkSemanticModel
+[[autodoc]] BarkSemanticModel
+    - forward
+BarkCoarseModel
+[[autodoc]] BarkCoarseModel
+    - forward
+BarkFineModel
+[[autodoc]] BarkFineModel
+    - forward
+BarkCausalModel
+[[autodoc]] BarkCausalModel
+    - forward
+BarkCoarseConfig
+[[autodoc]] BarkCoarseConfig
+    - all
+BarkFineConfig
+[[autodoc]] BarkFineConfig
+    - all
+BarkSemanticConfig
+[[autodoc]] BarkSemanticConfig
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f11370077ad438627755bb58b9d20baa7020d15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+BART
+
+Overview
+The Bart model was proposed in BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation,
+Translation, and Comprehension by Mike Lewis, Yinhan Liu, Naman Goyal, Marjan
+Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov and Luke Zettlemoyer on 29 Oct, 2019.
+According to the abstract,
+
+Bart uses a standard seq2seq/machine translation architecture with a bidirectional encoder (like BERT) and a
+  left-to-right decoder (like GPT).
+The pretraining task involves randomly shuffling the order of the original sentences and a novel in-filling scheme,
+  where spans of text are replaced with a single mask token.
+BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It
+  matches the performance of RoBERTa with comparable training resources on GLUE and SQuAD, achieves new
+  state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains
+  of up to 6 ROUGE.
+
+This model was contributed by sshleifer. The authors' code can be found here.
+Usage tips:
+
+BART is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Sequence-to-sequence model with an encoder and a decoder. Encoder is fed a corrupted version of the tokens, decoder is fed the original tokens (but has a mask to hide the future words like a regular transformers decoder). A composition of the following transformations are applied on the pretraining tasks for the encoder:
+
+mask random tokens (like in BERT)
+
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cce29f3a5e8a3717f512e8f7fdc638673a611568
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+delete random tokens
+mask a span of k tokens with a single mask token (a span of 0 tokens is an insertion of a mask token)
+permute sentences
+rotate the document to make it start at a specific token
+
+Implementation Notes
+
+Bart doesn't use token_type_ids for sequence classification. Use [BartTokenizer] or
+  [~BartTokenizer.encode] to get the proper splitting.
+The forward pass of [BartModel] will create the decoder_input_ids if they are not passed.
+  This is different than some other modeling APIs. A typical use case of this feature is mask filling.
+Model predictions are intended to be identical to the original implementation when
+  forced_bos_token_id=0. This only works, however, if the string you pass to
+  [fairseq.encode] starts with a space.
+[~generation.GenerationMixin.generate] should be used for conditional generation tasks like
+  summarization, see the example in that docstrings.
+Models that load the facebook/bart-large-cnn weights will not have a mask_token_id, or be able to perform
+  mask-filling tasks.
+
+Mask Filling
+The facebook/bart-base and facebook/bart-large checkpoints can be used to fill multi-token masks.
+thon
+from transformers import BartForConditionalGeneration, BartTokenizer
+model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
+tok = BartTokenizer.from_pretrained("facebook/bart-large")
+example_english_phrase = "UN Chief Says There Is No  in Syria"
+batch = tok(example_english_phrase, return_tensors="pt")
+generated_ids = model.generate(batch["input_ids"])
+assert tok.batch_decode(generated_ids, skip_special_tokens=True) == [
+    "UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria"
+]
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BART. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4309c693bd56e1c741c151915e344de9e848178d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_2.txt
@@ -0,0 +1,51 @@
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+A notebook on how to finetune BART for summarization with fastai using blurr. 🌎
+A notebook on how to finetune BART for summarization in two languages with Trainer class. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script.
+An example of how to train [BartForConditionalGeneration] with a Hugging Face datasets object can be found in this forum discussion
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+[FlaxBartForConditionalGeneration] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+A notebook on how to finetune mBART using Seq2SeqTrainer for Hindi to English translation. 🌎
+[BartForConditionalGeneration] is supported by this example script and notebook.
+[TFBartForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+See also:
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Distilled checkpoints are described in this paper.
+BartConfig
+[[autodoc]] BartConfig
+    - all
+BartTokenizer
+[[autodoc]] BartTokenizer
+    - all
+BartTokenizerFast
+[[autodoc]] BartTokenizerFast
+    - all
+
+BartModel
+[[autodoc]] BartModel
+    - forward
+BartForConditionalGeneration
+[[autodoc]] BartForConditionalGeneration
+    - forward
+BartForSequenceClassification
+[[autodoc]] BartForSequenceClassification
+    - forward
+BartForQuestionAnswering
+[[autodoc]] BartForQuestionAnswering
+    - forward
+BartForCausalLM
+[[autodoc]] BartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09f2226ac9b29f419e07726fee093942bcc2320b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bart.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+TFBartModel
+[[autodoc]] TFBartModel
+    - call
+TFBartForConditionalGeneration
+[[autodoc]] TFBartForConditionalGeneration
+    - call
+TFBartForSequenceClassification
+[[autodoc]] TFBartForSequenceClassification
+    - call
+
+FlaxBartModel
+[[autodoc]] FlaxBartModel
+    - call
+    - encode
+    - decode
+FlaxBartForConditionalGeneration
+[[autodoc]] FlaxBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxBartForSequenceClassification
+[[autodoc]] FlaxBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxBartForQuestionAnswering
+[[autodoc]] FlaxBartForQuestionAnswering
+    - call
+    - encode
+    - decode
+FlaxBartForCausalLM
+[[autodoc]] FlaxBartForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_barthez.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f3b193529d218fb562cadc9959233ef18db1c13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_barthez.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+BARThez
+Overview
+The BARThez model was proposed in BARThez: a Skilled Pretrained French Sequence-to-Sequence Model by Moussa Kamal Eddine, Antoine J.-P. Tixier, Michalis Vazirgiannis on 23 Oct,
+2020.
+The abstract of the paper:
+Inductive transfer learning, enabled by self-supervised learning, have taken the entire Natural Language Processing
+(NLP) field by storm, with models such as BERT and BART setting new state of the art on countless natural language
+understanding tasks. While there are some notable exceptions, most of the available models and research have been
+conducted for the English language. In this work, we introduce BARThez, the first BART model for the French language
+(to the best of our knowledge). BARThez was pretrained on a very large monolingual French corpus from past research
+that we adapted to suit BART's perturbation schemes. Unlike already existing BERT-based French language models such as
+CamemBERT and FlauBERT, BARThez is particularly well-suited for generative tasks, since not only its encoder but also
+its decoder is pretrained. In addition to discriminative tasks from the FLUE benchmark, we evaluate BARThez on a novel
+summarization dataset, OrangeSum, that we release with this paper. We also continue the pretraining of an already
+pretrained multilingual BART on BARThez's corpus, and we show that the resulting model, which we call mBARTHez,
+provides a significant boost over vanilla BARThez, and is on par with or outperforms CamemBERT and FlauBERT.
+This model was contributed by moussakam. The Authors' code can be found here.
+ 
+BARThez implementation is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. BARThez-specific tokenizers are documented below.  
+
+Resources
+
+BARThez can be fine-tuned on sequence-to-sequence tasks in a similar way as BART, check:
+  examples/pytorch/summarization/.
+
+BarthezTokenizer
+[[autodoc]] BarthezTokenizer
+BarthezTokenizerFast
+[[autodoc]] BarthezTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c7e80516e1f07a80c986c46f617fa74113f5dad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+BARTpho
+Overview
+The BARTpho model was proposed in BARTpho: Pre-trained Sequence-to-Sequence Models for Vietnamese by Nguyen Luong Tran, Duong Minh Le and Dat Quoc Nguyen.
+The abstract from the paper is the following:
+We present BARTpho with two versions -- BARTpho_word and BARTpho_syllable -- the first public large-scale monolingual
+sequence-to-sequence models pre-trained for Vietnamese. Our BARTpho uses the "large" architecture and pre-training
+scheme of the sequence-to-sequence denoising model BART, thus especially suitable for generative NLP tasks. Experiments
+on a downstream task of Vietnamese text summarization show that in both automatic and human evaluations, our BARTpho
+outperforms the strong baseline mBART and improves the state-of-the-art. We release BARTpho to facilitate future
+research and applications of generative Vietnamese NLP tasks.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bartpho = AutoModel.from_pretrained("vinai/bartpho-syllable")
+tokenizer = AutoTokenizer.from_pretrained("vinai/bartpho-syllable")
+line = "Chúng tôi là những nghiên cứu viên."
+input_ids = tokenizer(line, return_tensors="pt")
+with torch.no_grad():
+     features = bartpho(**input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bartpho = TFAutoModel.from_pretrained("vinai/bartpho-syllable")
+input_ids = tokenizer(line, return_tensors="tf")
+features = bartpho(**input_ids)
+
+Usage tips
+
+Following mBART, BARTpho uses the "large" architecture of BART with an additional layer-normalization layer on top of
+  both the encoder and decoder. Thus, usage examples in the documentation of BART, when adapting to use
+  with BARTpho, should be adjusted by replacing the BART-specialized classes with the mBART-specialized counterparts.
+  For example:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..274bdf8f271c88d90a9ebc18e874283dadaea9ca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bartpho.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+thon
+
+from transformers import MBartForConditionalGeneration
+bartpho = MBartForConditionalGeneration.from_pretrained("vinai/bartpho-syllable")
+TXT = "Chúng tôi là  nghiên cứu viên."
+input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
+logits = bartpho(input_ids).logits
+masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
+probs = logits[0, masked_index].softmax(dim=0)
+values, predictions = probs.topk(5)
+print(tokenizer.decode(predictions).split())
+
+This implementation is only for tokenization: "monolingual_vocab_file" consists of Vietnamese-specialized types
+  extracted from the pre-trained SentencePiece model "vocab_file" that is available from the multilingual XLM-RoBERTa.
+  Other languages, if employing this pre-trained multilingual SentencePiece model "vocab_file" for subword
+  segmentation, can reuse BartphoTokenizer with their own language-specialized "monolingual_vocab_file".
+
+BartphoTokenizer
+[[autodoc]] BartphoTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c693fbb3dc2f2ece40984c86ed7d1fff6de1f6bd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+BEiT
+Overview
+The BEiT model was proposed in BEiT: BERT Pre-Training of Image Transformers by
+Hangbo Bao, Li Dong and Furu Wei. Inspired by BERT, BEiT is the first paper that makes self-supervised pre-training of
+Vision Transformers (ViTs) outperform supervised pre-training. Rather than pre-training the model to predict the class
+of an image (as done in the original ViT paper), BEiT models are pre-trained to
+predict visual tokens from the codebook of OpenAI's DALL-E model given masked
+patches.
+The abstract from the paper is the following:
+We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation
+from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image
+modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image
+patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into
+visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training
+objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we
+directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder.
+Experimental results on image classification and semantic segmentation show that our model achieves competitive results
+with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K,
+significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains
+86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%).
+This model was contributed by nielsr. The JAX/FLAX version of this model was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a616a49710f3b74a4a1246f2dc5bc983b795322f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+BEiT models are regular Vision Transformers, but pre-trained in a self-supervised way rather than supervised. They
+  outperform both the original model (ViT) as well as Data-efficient Image Transformers (DeiT) when fine-tuned on ImageNet-1K and CIFAR-100. You can check out demo notebooks regarding inference as well as
+  fine-tuning on custom data here (you can just replace
+  [ViTFeatureExtractor] by [BeitImageProcessor] and
+  [ViTForImageClassification] by [BeitForImageClassification]).
+There's also a demo notebook available which showcases how to combine DALL-E's image tokenizer with BEiT for
+  performing masked image modeling. You can find it here.
+As the BEiT models expect each image to be of the same size (resolution), one can use
+  [BeitImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, microsoft/beit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of
+  14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+BEiT uses relative position embeddings, inspired by the T5 model. During pre-training, the authors shared the
+  relative position bias among the several self-attention layers. During fine-tuning, each layer's relative position
+  bias is initialized with the shared relative position bias obtained after pre-training. Note that, if one wants to
+  pre-train a model from scratch, one needs to either set the use_relative_position_bias or the
+  use_relative_position_bias attribute of [BeitConfig] to True in order to add
+  position embeddings.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3016b20acbc0d0690aad51758dfc3fe9e2d80967
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_beit.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+BEiT pre-training. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BEiT specific outputs
+[[autodoc]] models.beit.modeling_beit.BeitModelOutputWithPooling
+[[autodoc]] models.beit.modeling_flax_beit.FlaxBeitModelOutputWithPooling
+BeitConfig
+[[autodoc]] BeitConfig
+BeitFeatureExtractor
+[[autodoc]] BeitFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+BeitImageProcessor
+[[autodoc]] BeitImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+BeitModel
+[[autodoc]] BeitModel
+    - forward
+BeitForMaskedImageModeling
+[[autodoc]] BeitForMaskedImageModeling
+    - forward
+BeitForImageClassification
+[[autodoc]] BeitForImageClassification
+    - forward
+BeitForSemanticSegmentation
+[[autodoc]] BeitForSemanticSegmentation
+    - forward
+
+FlaxBeitModel
+[[autodoc]] FlaxBeitModel
+    - call
+FlaxBeitForMaskedImageModeling
+[[autodoc]] FlaxBeitForMaskedImageModeling
+    - call
+FlaxBeitForImageClassification
+[[autodoc]] FlaxBeitForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df145fee209e963537ef9bcc7dcfcfa3dc53f63b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+BertGeneration
+Overview
+The BertGeneration model is a BERT model that can be leveraged for sequence-to-sequence tasks using
+[EncoderDecoderModel] as proposed in Leveraging Pre-trained Checkpoints for Sequence Generation
+Tasks by Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+The abstract from the paper is the following:
+Unsupervised pretraining of large neural models has recently revolutionized Natural Language Processing. By
+warm-starting from the publicly released checkpoints, NLP practitioners have pushed the state-of-the-art on multiple
+benchmarks while saving significant amounts of compute time. So far the focus has been mainly on the Natural Language
+Understanding tasks. In this paper, we demonstrate the efficacy of pre-trained checkpoints for Sequence Generation. We
+developed a Transformer-based sequence-to-sequence model that is compatible with publicly available pre-trained BERT,
+GPT-2 and RoBERTa checkpoints and conducted an extensive empirical study on the utility of initializing our model, both
+encoder and decoder, with these checkpoints. Our models result in new state-of-the-art results on Machine Translation,
+Text Summarization, Sentence Splitting, and Sentence Fusion.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage examples and tips
+The model can be used in combination with the [EncoderDecoderModel] to leverage two pretrained BERT checkpoints for 
+subsequent fine-tuning:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a46d9d64484fd690a242e94002668ff92a4f5fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-generation.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+leverage checkpoints for Bert2Bert model
+use BERT's cls token as BOS token and sep token as EOS token
+encoder = BertGenerationEncoder.from_pretrained("google-bert/bert-large-uncased", bos_token_id=101, eos_token_id=102)
+add cross attention layers and use BERT's cls token as BOS token and sep token as EOS token
+decoder = BertGenerationDecoder.from_pretrained(
+     "google-bert/bert-large-uncased", add_cross_attention=True, is_decoder=True, bos_token_id=101, eos_token_id=102
+ )
+bert2bert = EncoderDecoderModel(encoder=encoder, decoder=decoder)
+create tokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-large-uncased")
+input_ids = tokenizer(
+     "This is a long article to summarize", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+labels = tokenizer("This is a short summary", return_tensors="pt").input_ids
+train
+loss = bert2bert(input_ids=input_ids, decoder_input_ids=labels, labels=labels).loss
+loss.backward()
+
+Pretrained [EncoderDecoderModel] are also directly available in the model hub, e.g.:
+thon
+
+instantiate sentence fusion model
+sentence_fuser = EncoderDecoderModel.from_pretrained("google/roberta2roberta_L-24_discofuse")
+tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_discofuse")
+input_ids = tokenizer(
+     "This is the first sentence. This is the second sentence.", add_special_tokens=False, return_tensors="pt"
+ ).input_ids
+outputs = sentence_fuser.generate(input_ids)
+print(tokenizer.decode(outputs[0]))
+
+Tips:
+
+[BertGenerationEncoder] and [BertGenerationDecoder] should be used in
+  combination with [EncoderDecoder].
+For summarization, sentence splitting, sentence fusion and translation, no special tokens are required for the input.
+  Therefore, no EOS token should be added to the end of the input.
+
+BertGenerationConfig
+[[autodoc]] BertGenerationConfig
+BertGenerationTokenizer
+[[autodoc]] BertGenerationTokenizer
+    - save_vocabulary
+BertGenerationEncoder
+[[autodoc]] BertGenerationEncoder
+    - forward
+BertGenerationDecoder
+[[autodoc]] BertGenerationDecoder
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4bbd57a1197ff4b017ad4bd1c853a4615b2446
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert-japanese.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+BertJapanese
+Overview
+The BERT models trained on Japanese text.
+There are models with two different tokenization methods:
+
+Tokenize with MeCab and WordPiece. This requires some extra dependencies, fugashi which is a wrapper around MeCab.
+Tokenize into characters.
+
+To use MecabTokenizer, you should pip install transformers["ja"] (or pip install -e .["ja"] if you install
+from source) to install dependencies.
+See details on cl-tohoku repository.
+Example of using a model with MeCab and WordPiece tokenization:
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾輩 は 猫 で ある 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+Example of using a model with Character tokenization:
+thon
+
+bertjapanese = AutoModel.from_pretrained("cl-tohoku/bert-base-japanese-char")
+tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese-char")
+Input Japanese Text
+line = "吾輩は猫である。"
+inputs = tokenizer(line, return_tensors="pt")
+print(tokenizer.decode(inputs["input_ids"][0]))
+[CLS] 吾 輩 は 猫 で あ る 。 [SEP]
+outputs = bertjapanese(**inputs)
+
+This model was contributed by cl-tohoku.
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertJapaneseTokenizer
+[[autodoc]] BertJapaneseTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1ff7e764bea8f9051e48b1f9e23ea442c563e73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+BERT
+
+Overview
+The BERT model was proposed in BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. It's a
+bidirectional transformer pretrained using a combination of masked language modeling objective and next sentence
+prediction on a large corpus comprising the Toronto Book Corpus and Wikipedia.
+The abstract from the paper is the following:
+We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations
+from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional
+representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result,
+the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models
+for a wide range of tasks, such as question answering and language inference, without substantial task-specific
+architecture modifications.
+BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural
+language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI
+accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute
+improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+BERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+BERT was trained with the masked language modeling (MLM) and next sentence prediction (NSP) objectives. It is
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation.
+
+Corrupts the inputs by using random masking, more precisely, during pretraining, a given percentage of tokens (usually 15%) is masked by:
+
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ad28c28cb5deaeb626019e351b378de46178d51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+a special mask token with probability 0.8
+a random token different from the one masked with probability 0.1
+the same token with probability 0.1
+
+The model must predict the original sentence, but has a second objective: inputs are two sentences A and B (with a separation token in between). With probability 50%, the sentences are consecutive in the corpus, in the remaining 50% they are not related. The model has to predict if the sentences are consecutive or not.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+
+from transformers import BertModel
+model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1c2a100c700eee3da8c2b04b0a28a6983e6a8a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-80GB, CPUx12, RAM 96.6GB, PyTorch 2.2.0, OS Ubuntu 22.04) with float16, we saw the 
+following speedups during training and inference.
+Training
+|batch_size|seq_len|Time per batch (eager - s)|Time per batch (sdpa - s)|Speedup (%)|Eager peak mem (MB)|sdpa peak mem (MB)|Mem saving (%)|
+|----------|-------|--------------------------|-------------------------|-----------|-------------------|------------------|--------------|
+|4         |256    |0.023                     |0.017                    |35.472     |939.213            |764.834           |22.800        |
+|4         |512    |0.023                     |0.018                    |23.687     |1970.447           |1227.162          |60.569        |
+|8         |256    |0.023                     |0.018                    |23.491     |1594.295           |1226.114          |30.028        |
+|8         |512    |0.035                     |0.025                    |43.058     |3629.401           |2134.262          |70.054        |
+|16        |256    |0.030                     |0.024                    |25.583     |2874.426           |2134.262          |34.680        |
+|16        |512    |0.064                     |0.044                    |46.223     |6964.659           |3961.013          |75.830        |
+Inference
+|batch_size|seq_len|Per token latency eager (ms)|Per token latency SDPA (ms)|Speedup (%)|Mem eager (MB)|Mem BT (MB)|Mem saved (%)|
+|----------|-------|----------------------------|---------------------------|-----------|--------------|-----------|-------------|
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b63402098380ea7c85fb7992d50b06806e01c55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+|1         |128    |5.736                       |4.987                      |15.022     |282.661       |282.924    |-0.093       |
+|1         |256    |5.689                       |4.945                      |15.055     |298.686       |298.948    |-0.088       |
+|2         |128    |6.154                       |4.982                      |23.521     |314.523       |314.785    |-0.083       |
+|2         |256    |6.201                       |4.949                      |25.303     |347.546       |347.033    |0.148        |
+|4         |128    |6.049                       |4.987                      |21.305     |378.895       |379.301    |-0.107       |
+|4         |256    |6.285                       |5.364                      |17.166     |443.209       |444.382    |-0.264       |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4078284854798e8f18b82f276af4529f63c04bec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_4.txt
@@ -0,0 +1,28 @@
+A blog post on BERT Text Classification in a different language.
+A notebook for Finetuning BERT (and friends) for multi-label text classification.
+A notebook on how to Finetune BERT for multi-label classification using PyTorch. 🌎
+A notebook on how to warm-start an EncoderDecoder model with BERT for summarization.
+[BertForSequenceClassification] is supported by this example script and notebook.
+[TFBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+A blog post on how to use Hugging Face Transformers with Keras: Fine-tune a non-English BERT for Named Entity Recognition.
+A notebook for Finetuning BERT for named-entity recognition using only the first wordpiece of each word in the word label during tokenization. To propagate the label of the word to all wordpieces, see this version of the notebook instead.
+[BertForTokenClassification] is supported by this example script and notebook.
+[TFBertForTokenClassification] is supported by this example script and notebook.
+[FlaxBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[BertForMaskedLM] is supported by this example script and notebook.
+[TFBertForMaskedLM] is supported by this example script and notebook.
+[FlaxBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[BertForQuestionAnswering] is supported by this example script and notebook.
+[TFBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c92c2e23ecf868774e96f605269e42ce2b50e58e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_5.txt
@@ -0,0 +1,36 @@
+Multiple choice
+- [BertForMultipleChoice] is supported by this example script and notebook.
+- [TFBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚡️ Inference
+- A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia.
+- A blog post on how to Accelerate BERT inference with DeepSpeed-Inference on GPUs.
+⚙️ Pretraining
+- A blog post on Pre-Training BERT with Hugging Face Transformers and Habana Gaudi.
+🚀 Deploy
+- A blog post on how to Convert Transformers to ONNX with Hugging Face Optimum.
+- A blog post on how to Setup Deep Learning environment for Hugging Face Transformers with Habana Gaudi on AWS.
+- A blog post on Autoscaling BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+- A blog post on Serverless BERT with HuggingFace, AWS Lambda, and Docker.
+- A blog post on Hugging Face Transformers BERT fine-tuning using Amazon SageMaker and Training Compiler.
+- A blog post on Task-specific knowledge distillation for BERT using Transformers & Amazon SageMaker.
+BertConfig
+[[autodoc]] BertConfig
+    - all
+BertTokenizer
+[[autodoc]] BertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+BertTokenizerFast
+[[autodoc]] BertTokenizerFast
+
+TFBertTokenizer
+[[autodoc]] TFBertTokenizer
+
+Bert specific outputs
+[[autodoc]] models.bert.modeling_bert.BertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_tf_bert.TFBertForPreTrainingOutput
+[[autodoc]] models.bert.modeling_flax_bert.FlaxBertForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72d933090ac1e5684607f499ef118eebfe876b5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_6.txt
@@ -0,0 +1,55 @@
+BertModel
+[[autodoc]] BertModel
+    - forward
+BertForPreTraining
+[[autodoc]] BertForPreTraining
+    - forward
+BertLMHeadModel
+[[autodoc]] BertLMHeadModel
+    - forward
+BertForMaskedLM
+[[autodoc]] BertForMaskedLM
+    - forward
+BertForNextSentencePrediction
+[[autodoc]] BertForNextSentencePrediction
+    - forward
+BertForSequenceClassification
+[[autodoc]] BertForSequenceClassification
+    - forward
+BertForMultipleChoice
+[[autodoc]] BertForMultipleChoice
+    - forward
+BertForTokenClassification
+[[autodoc]] BertForTokenClassification
+    - forward
+BertForQuestionAnswering
+[[autodoc]] BertForQuestionAnswering
+    - forward
+
+TFBertModel
+[[autodoc]] TFBertModel
+    - call
+TFBertForPreTraining
+[[autodoc]] TFBertForPreTraining
+    - call
+TFBertModelLMHeadModel
+[[autodoc]] TFBertLMHeadModel
+    - call
+TFBertForMaskedLM
+[[autodoc]] TFBertForMaskedLM
+    - call
+TFBertForNextSentencePrediction
+[[autodoc]] TFBertForNextSentencePrediction
+    - call
+TFBertForSequenceClassification
+[[autodoc]] TFBertForSequenceClassification
+    - call
+TFBertForMultipleChoice
+[[autodoc]] TFBertForMultipleChoice
+    - call
+TFBertForTokenClassification
+[[autodoc]] TFBertForTokenClassification
+    - call
+TFBertForQuestionAnswering
+[[autodoc]] TFBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ffeffa3399546c0928705b8e638c13ce7dad327
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bert.txt_chunk_7.txt
@@ -0,0 +1,27 @@
+FlaxBertModel
+[[autodoc]] FlaxBertModel
+    - call
+FlaxBertForPreTraining
+[[autodoc]] FlaxBertForPreTraining
+    - call
+FlaxBertForCausalLM
+[[autodoc]] FlaxBertForCausalLM
+    - call
+FlaxBertForMaskedLM
+[[autodoc]] FlaxBertForMaskedLM
+    - call
+FlaxBertForNextSentencePrediction
+[[autodoc]] FlaxBertForNextSentencePrediction
+    - call
+FlaxBertForSequenceClassification
+[[autodoc]] FlaxBertForSequenceClassification
+    - call
+FlaxBertForMultipleChoice
+[[autodoc]] FlaxBertForMultipleChoice
+    - call
+FlaxBertForTokenClassification
+[[autodoc]] FlaxBertForTokenClassification
+    - call
+FlaxBertForQuestionAnswering
+[[autodoc]] FlaxBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db216fd85516a776ce583f8642525024317eecf7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bertweet.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+BERTweet
+Overview
+The BERTweet model was proposed in BERTweet: A pre-trained language model for English Tweets by Dat Quoc Nguyen, Thanh Vu, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present BERTweet, the first public large-scale pre-trained language model for English Tweets. Our BERTweet, having
+the same architecture as BERT-base (Devlin et al., 2019), is trained using the RoBERTa pre-training procedure (Liu et
+al., 2019). Experiments show that BERTweet outperforms strong baselines RoBERTa-base and XLM-R-base (Conneau et al.,
+2020), producing better performance results than the previous state-of-the-art models on three Tweet NLP tasks:
+Part-of-speech tagging, Named-entity recognition and text classification.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+bertweet = AutoModel.from_pretrained("vinai/bertweet-base")
+For transformers v4.x+:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
+For transformers v3.x:
+tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base")
+INPUT TWEET IS ALREADY NORMALIZED!
+line = "SC has first two presumptive cases of coronavirus , DHEC confirms HTTPURL via @USER :cry:"
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = bertweet(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+bertweet = TFAutoModel.from_pretrained("vinai/bertweet-base")
+
+ 
+This implementation is the same as BERT, except for tokenization method. Refer to BERT documentation for 
+API reference information.  
+
+BertweetTokenizer
+[[autodoc]] BertweetTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca5ed16b0d83d13a243fce461885a0b09643e67d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+BigBird
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+This model was contributed by vasudevgupta. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7422a555932db33fb512a20af7539c66afb68b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_1.txt
@@ -0,0 +1,56 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+BigBirdConfig
+[[autodoc]] BigBirdConfig
+BigBirdTokenizer
+[[autodoc]] BigBirdTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BigBirdTokenizerFast
+[[autodoc]] BigBirdTokenizerFast
+BigBird specific outputs
+[[autodoc]] models.big_bird.modeling_big_bird.BigBirdForPreTrainingOutput
+
+BigBirdModel
+[[autodoc]] BigBirdModel
+    - forward
+BigBirdForPreTraining
+[[autodoc]] BigBirdForPreTraining
+    - forward
+BigBirdForCausalLM
+[[autodoc]] BigBirdForCausalLM
+    - forward
+BigBirdForMaskedLM
+[[autodoc]] BigBirdForMaskedLM
+    - forward
+BigBirdForSequenceClassification
+[[autodoc]] BigBirdForSequenceClassification
+    - forward
+BigBirdForMultipleChoice
+[[autodoc]] BigBirdForMultipleChoice
+    - forward
+BigBirdForTokenClassification
+[[autodoc]] BigBirdForTokenClassification
+    - forward
+BigBirdForQuestionAnswering
+[[autodoc]] BigBirdForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1cd5b636a113ffbc4bffc3d6ad9915126bd12e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_big_bird.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+FlaxBigBirdModel
+[[autodoc]] FlaxBigBirdModel
+    - call
+FlaxBigBirdForPreTraining
+[[autodoc]] FlaxBigBirdForPreTraining
+    - call
+FlaxBigBirdForCausalLM
+[[autodoc]] FlaxBigBirdForCausalLM
+    - call
+FlaxBigBirdForMaskedLM
+[[autodoc]] FlaxBigBirdForMaskedLM
+    - call
+FlaxBigBirdForSequenceClassification
+[[autodoc]] FlaxBigBirdForSequenceClassification
+    - call
+FlaxBigBirdForMultipleChoice
+[[autodoc]] FlaxBigBirdForMultipleChoice
+    - call
+FlaxBigBirdForTokenClassification
+[[autodoc]] FlaxBigBirdForTokenClassification
+    - call
+FlaxBigBirdForQuestionAnswering
+[[autodoc]] FlaxBigBirdForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8da8cccbb98e4ec723b134a12b73a2361171d2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+BigBirdPegasus
+Overview
+The BigBird model was proposed in Big Bird: Transformers for Longer Sequences by
+Zaheer, Manzil and Guruganesh, Guru and Dubey, Kumar Avinava and Ainslie, Joshua and Alberti, Chris and Ontanon,
+Santiago and Pham, Philip and Ravula, Anirudh and Wang, Qifan and Yang, Li and others. BigBird, is a sparse-attention
+based transformer which extends Transformer based models, such as BERT to much longer sequences. In addition to sparse
+attention, BigBird also applies global attention as well as random attention to the input sequence. Theoretically, it
+has been shown that applying sparse, global, and random attention approximates full attention, while being
+computationally much more efficient for longer sequences. As a consequence of the capability to handle longer context,
+BigBird has shown improved performance on various long document NLP tasks, such as question answering and
+summarization, compared to BERT or RoBERTa.
+The abstract from the paper is the following:
+Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP.
+Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence
+length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that
+reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and
+is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our
+theoretical analysis reveals some of the benefits of having O(1) global tokens (such as CLS), that attend to the entire
+sequence as part of the sparse attention mechanism. The proposed sparse attention can handle sequences of length up to
+8x of what was previously possible using similar hardware. As a consequence of the capability to handle longer context,
+BigBird drastically improves performance on various NLP tasks such as question answering and summarization. We also
+propose novel applications to genomics data.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de7750e15e394351ca85ad1e3f8a9e1f9a2eeea5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bigbird_pegasus.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+For an in-detail explanation on how BigBird's attention works, see this blog post.
+BigBird comes with 2 implementations: original_full & block_sparse. For the sequence length < 1024, using
+  original_full is advised as there is no benefit in using block_sparse attention.
+The code currently uses window size of 3 blocks and 2 global blocks.
+Sequence length must be divisible by block size.
+Current implementation supports only ITC.
+Current implementation doesn't support num_random_blocks = 0.
+BigBirdPegasus uses the PegasusTokenizer.
+BigBird is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BigBirdPegasusConfig
+[[autodoc]] BigBirdPegasusConfig
+    - all
+BigBirdPegasusModel
+[[autodoc]] BigBirdPegasusModel
+    - forward
+BigBirdPegasusForConditionalGeneration
+[[autodoc]] BigBirdPegasusForConditionalGeneration
+    - forward
+BigBirdPegasusForSequenceClassification
+[[autodoc]] BigBirdPegasusForSequenceClassification
+    - forward
+BigBirdPegasusForQuestionAnswering
+[[autodoc]] BigBirdPegasusForQuestionAnswering
+    - forward
+BigBirdPegasusForCausalLM
+[[autodoc]] BigBirdPegasusForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cdc618da7c82417634a60f8c6c23ec7c4e71a2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+BioGPT
+Overview
+The BioGPT model was proposed in BioGPT: generative pre-trained transformer for biomedical text generation and mining by Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon and Tie-Yan Liu. BioGPT is a domain-specific generative pre-trained Transformer language model for biomedical text generation and mining. BioGPT follows the Transformer language model backbone, and is pre-trained on 15M PubMed abstracts from scratch.
+The abstract from the paper is the following:
+Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98%, 38.42% and 40.76% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.
+This model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9634b7d573655c774730257151d978d4efd1a4e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_biogpt.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+BioGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than the left.
+BioGPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next token in a sequence. Leveraging this feature allows BioGPT to generate syntactically coherent text as it can be observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) as input, which is the previously computed key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the BioGptForCausalLM.forward() method for more information on its usage.
+
+Resources
+
+Causal language modeling task guide
+
+BioGptConfig
+[[autodoc]] BioGptConfig
+BioGptTokenizer
+[[autodoc]] BioGptTokenizer
+    - save_vocabulary
+BioGptModel
+[[autodoc]] BioGptModel
+    - forward
+BioGptForCausalLM
+[[autodoc]] BioGptForCausalLM
+    - forward
+BioGptForTokenClassification
+[[autodoc]] BioGptForTokenClassification
+    - forward
+BioGptForSequenceClassification
+[[autodoc]] BioGptForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77038727ed35e4d1868f71e53392e20e34e321a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Big Transfer (BiT)
+Overview
+The BiT model was proposed in Big Transfer (BiT): General Visual Representation Learning by Alexander Kolesnikov, Lucas Beyer, Xiaohua Zhai, Joan Puigcerver, Jessica Yung, Sylvain Gelly, Neil Houlsby.
+BiT is a simple recipe for scaling up pre-training of ResNet-like architectures (specifically, ResNetv2). The method results in significant improvements for transfer learning.
+The abstract from the paper is the following:
+Transfer of pre-trained representations improves sample efficiency and simplifies hyperparameter tuning when training deep neural networks for vision. We revisit the paradigm of pre-training on large supervised datasets and fine-tuning the model on a target task. We scale up pre-training, and propose a simple recipe that we call Big Transfer (BiT). By combining a few carefully selected components, and transferring using a simple heuristic, we achieve strong performance on over 20 datasets. BiT performs well across a surprisingly wide range of data regimes -- from 1 example per class to 1M total examples. BiT achieves 87.5% top-1 accuracy on ILSVRC-2012, 99.4% on CIFAR-10, and 76.3% on the 19 task Visual Task Adaptation Benchmark (VTAB). On small datasets, BiT attains 76.8% on ILSVRC-2012 with 10 examples per class, and 97.0% on CIFAR-10 with 10 examples per class. We conduct detailed analysis of the main components that lead to high transfer performance.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BiT models are equivalent to ResNetv2 in terms of architecture, except that: 1) all batch normalization layers are replaced by group normalization,
+2) weight standardization is used for convolutional layers. The authors show that the combination of both is useful for training with large batch sizes, and has a significant
+impact on transfer learning.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BiT.
+
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98cb639cdeb7550cbe42f8a7c0e534b16dbb146b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bit.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+[BitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+BitConfig
+[[autodoc]] BitConfig
+BitImageProcessor
+[[autodoc]] BitImageProcessor
+    - preprocess
+BitModel
+[[autodoc]] BitModel
+    - forward
+BitForImageClassification
+[[autodoc]] BitForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e01db517a6357726be67d535328e7f9837dbf111
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Blenderbot Small
+Note that [BlenderbotSmallModel] and
+[BlenderbotSmallForConditionalGeneration] are only used in combination with the checkpoint
+facebook/blenderbot-90M. Larger Blenderbot checkpoints should
+instead be used with [BlenderbotModel] and
+[BlenderbotForConditionalGeneration]
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by patrickvonplaten. The authors' code can be
+found here.
+Usage tips
+Blenderbot Small is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than 
+the left.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c7e1613a32223d5a861c71bdc007c8ffdc91b9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot-small.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotSmallConfig
+[[autodoc]] BlenderbotSmallConfig
+BlenderbotSmallTokenizer
+[[autodoc]] BlenderbotSmallTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+BlenderbotSmallTokenizerFast
+[[autodoc]] BlenderbotSmallTokenizerFast
+
+BlenderbotSmallModel
+[[autodoc]] BlenderbotSmallModel
+    - forward
+BlenderbotSmallForConditionalGeneration
+[[autodoc]] BlenderbotSmallForConditionalGeneration
+    - forward
+BlenderbotSmallForCausalLM
+[[autodoc]] BlenderbotSmallForCausalLM
+    - forward
+
+TFBlenderbotSmallModel
+[[autodoc]] TFBlenderbotSmallModel
+    - call
+TFBlenderbotSmallForConditionalGeneration
+[[autodoc]] TFBlenderbotSmallForConditionalGeneration
+    - call
+
+FlaxBlenderbotSmallModel
+[[autodoc]] FlaxBlenderbotSmallModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotSmallForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16bd2f1d4022758550ecbb2ce79b67c7ca041f1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Blenderbot
+Overview
+The Blender chatbot model was proposed in Recipes for building an open-domain chatbot Stephen Roller, Emily Dinan, Naman Goyal, Da Ju, Mary Williamson, Yinhan Liu,
+Jing Xu, Myle Ott, Kurt Shuster, Eric M. Smith, Y-Lan Boureau, Jason Weston on 30 Apr 2020.
+The abstract of the paper is the following:
+Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that
+scaling neural models in the number of parameters and the size of the data they are trained on gives improved results,
+we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of
+skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to
+their partners, and displaying knowledge, empathy and personality appropriately, while maintaining a consistent
+persona. We show that large scale models can learn these skills when given appropriate training data and choice of
+generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter models, and make our models
+and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn
+dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing
+failure cases of our models.
+This model was contributed by sshleifer. The authors' code can be found here .
+Usage tips and example
+Blenderbot is a model with absolute position embeddings so it's usually advised to pad the inputs on the right 
+rather than the left.
+An example:
+thon
+
+from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
+mname = "facebook/blenderbot-400M-distill"
+model = BlenderbotForConditionalGeneration.from_pretrained(mname)
+tokenizer = BlenderbotTokenizer.from_pretrained(mname)
+UTTERANCE = "My friends are cool but they eat too many carbs."
+inputs = tokenizer([UTTERANCE], return_tensors="pt")
+reply_ids = model.generate(**inputs)
+print(tokenizer.batch_decode(reply_ids))
+[" That's unfortunate. Are they trying to lose weight or are they just trying to be healthier?"]
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1df3b36f20cd03c9b535f348652ed68ac1961dd0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blenderbot.txt_chunk_1.txt
@@ -0,0 +1,52 @@
+Implementation Notes
+
+Blenderbot uses a standard seq2seq model transformer based architecture.
+Available checkpoints can be found in the model hub.
+This is the default Blenderbot model class. However, some smaller checkpoints, such as
+  facebook/blenderbot_small_90M, have a different architecture and consequently should be used with
+  BlenderbotSmall.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+BlenderbotConfig
+[[autodoc]] BlenderbotConfig
+BlenderbotTokenizer
+[[autodoc]] BlenderbotTokenizer
+    - build_inputs_with_special_tokens
+BlenderbotTokenizerFast
+[[autodoc]] BlenderbotTokenizerFast
+    - build_inputs_with_special_tokens
+
+BlenderbotModel
+See [~transformers.BartModel] for arguments to forward and generate
+[[autodoc]] BlenderbotModel
+    - forward
+BlenderbotForConditionalGeneration
+See [~transformers.BartForConditionalGeneration] for arguments to forward and generate
+[[autodoc]] BlenderbotForConditionalGeneration
+    - forward
+BlenderbotForCausalLM
+[[autodoc]] BlenderbotForCausalLM
+    - forward
+
+TFBlenderbotModel
+[[autodoc]] TFBlenderbotModel
+    - call
+TFBlenderbotForConditionalGeneration
+[[autodoc]] TFBlenderbotForConditionalGeneration
+    - call
+
+FlaxBlenderbotModel
+[[autodoc]] FlaxBlenderbotModel
+    - call
+    - encode
+    - decode
+FlaxBlenderbotForConditionalGeneration
+[[autodoc]] FlaxBlenderbotForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53abb81a1952fa7cdd9763c806d622bdfe1c9de8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+BLIP-2
+Overview
+The BLIP-2 model was proposed in BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models by
+Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi. BLIP-2 leverages frozen pre-trained image encoders and large language models (LLMs) by training a lightweight, 12-layer Transformer
+encoder in between them, achieving state-of-the-art performance on various vision-language tasks. Most notably, BLIP-2 improves upon Flamingo, an 80 billion parameter model, by 8.7%
+on zero-shot VQAv2 with 54x fewer trainable parameters. 
+The abstract from the paper is the following:
+The cost of vision-and-language pre-training has become increasingly prohibitive due to end-to-end training of large-scale models. This paper proposes BLIP-2, a generic and efficient pre-training strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models. BLIP-2 bridges the modality gap with a lightweight Querying Transformer, which is pre-trained in two stages. The first stage bootstraps vision-language representation learning from a frozen image encoder. The second stage bootstraps vision-to-language generative learning from a frozen language model. BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods. For example, our model outperforms Flamingo80B by 8.7% on zero-shot VQAv2 with 54x fewer trainable parameters. We also demonstrate the model's emerging capabilities of zero-shot image-to-text generation that can follow natural language instructions.
+ 
+ BLIP-2 architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+BLIP-2 can be used for conditional text generation given an image and an optional text prompt. At inference time, it's recommended to use the [generate] method.
+One can use [Blip2Processor] to prepare images for the model, and decode the predicted tokens ID's back to text.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a79298be7179c381c8167c0745b5025946a8989d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip-2.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLIP-2.
+
+Demo notebooks for BLIP-2 for image captioning, visual question answering (VQA) and chat-like conversations can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Blip2Config
+[[autodoc]] Blip2Config
+    - from_vision_qformer_text_configs
+Blip2VisionConfig
+[[autodoc]] Blip2VisionConfig
+Blip2QFormerConfig
+[[autodoc]] Blip2QFormerConfig
+Blip2Processor
+[[autodoc]] Blip2Processor
+Blip2VisionModel
+[[autodoc]] Blip2VisionModel
+    - forward
+Blip2QFormerModel
+[[autodoc]] Blip2QFormerModel
+    - forward
+Blip2Model
+[[autodoc]] Blip2Model
+    - forward
+    - get_text_features
+    - get_image_features
+    - get_qformer_features
+Blip2ForConditionalGeneration
+[[autodoc]] Blip2ForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b968e8c9315696e5d79f22c8d26d89d4ed99ed7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+BLIP
+Overview
+The BLIP model was proposed in BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation by Junnan Li, Dongxu Li, Caiming Xiong, Steven Hoi.
+BLIP is a model that is able to perform various multi-modal tasks including:
+- Visual Question Answering 
+- Image-Text retrieval (Image-text matching)
+- Image Captioning
+The abstract from the paper is the following:
+Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. 
+However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7% in average recall@1), image captioning (+2.8% in CIDEr), and VQA (+1.6% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to videolanguage tasks in a zero-shot manner. Code, models, and datasets are released.
+
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Jupyter notebook on how to fine-tune BLIP for image captioning on a custom dataset
+
+BlipConfig
+[[autodoc]] BlipConfig
+    - from_text_vision_configs
+BlipTextConfig
+[[autodoc]] BlipTextConfig
+BlipVisionConfig
+[[autodoc]] BlipVisionConfig
+BlipProcessor
+[[autodoc]] BlipProcessor
+BlipImageProcessor
+[[autodoc]] BlipImageProcessor
+    - preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..399253190e69070ff4b1713339209d94951dc685
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_blip.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+BlipModel
+BlipModel is going to be deprecated in future versions, please use BlipForConditionalGeneration, BlipForImageTextRetrieval or BlipForQuestionAnswering depending on your usecase.
+[[autodoc]] BlipModel
+    - forward
+    - get_text_features
+    - get_image_features
+BlipTextModel
+[[autodoc]] BlipTextModel
+    - forward
+BlipVisionModel
+[[autodoc]] BlipVisionModel
+    - forward
+BlipForConditionalGeneration
+[[autodoc]] BlipForConditionalGeneration
+    - forward
+BlipForImageTextRetrieval
+[[autodoc]] BlipForImageTextRetrieval
+    - forward
+BlipForQuestionAnswering
+[[autodoc]] BlipForQuestionAnswering
+    - forward
+
+TFBlipModel
+[[autodoc]] TFBlipModel
+    - call
+    - get_text_features
+    - get_image_features
+TFBlipTextModel
+[[autodoc]] TFBlipTextModel
+    - call
+TFBlipVisionModel
+[[autodoc]] TFBlipVisionModel
+    - call
+TFBlipForConditionalGeneration
+[[autodoc]] TFBlipForConditionalGeneration
+    - call
+TFBlipForImageTextRetrieval
+[[autodoc]] TFBlipForImageTextRetrieval
+    - call
+TFBlipForQuestionAnswering
+[[autodoc]] TFBlipForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bloom.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f99dfc468ec4184ed17682d20dc71a7992a0df9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bloom.txt_chunk_0.txt
@@ -0,0 +1,57 @@
+BLOOM
+Overview
+The BLOOM model has been proposed with its various versions through the BigScience Workshop. BigScience is inspired by other open science initiatives where researchers have pooled their time and resources to collectively achieve a higher impact.
+The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
+Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
+
+bloom-560m
+bloom-1b1
+bloom-1b7
+bloom-3b
+bloom-7b1
+bloom (176B parameters)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BLOOM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+[BloomForCausalLM] is supported by this causal language modeling example script and notebook.
+
+See also:
+- Causal language modeling task guide
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+⚡️ Inference
+- A blog on Optimization story: Bloom inference.
+- A blog on Incredibly Fast BLOOM Inference with DeepSpeed and Accelerate.
+⚙️ Training
+- A blog on The Technology Behind BLOOM Training.
+BloomConfig
+[[autodoc]] BloomConfig
+    - all
+BloomTokenizerFast
+[[autodoc]] BloomTokenizerFast
+    - all
+
+BloomModel
+[[autodoc]] BloomModel
+    - forward
+BloomForCausalLM
+[[autodoc]] BloomForCausalLM
+    - forward
+BloomForSequenceClassification
+[[autodoc]] BloomForSequenceClassification
+    - forward
+BloomForTokenClassification
+[[autodoc]] BloomForTokenClassification
+    - forward
+BloomForQuestionAnswering
+[[autodoc]] BloomForQuestionAnswering
+    - forward
+
+FlaxBloomModel
+[[autodoc]] FlaxBloomModel
+    - call
+FlaxBloomForCausalLM
+[[autodoc]] FlaxBloomForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4a03c1e3fbff16cae75bc1100e861aff91ecf67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+BORT
+
+This model is in maintenance mode only, we do not accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The BORT model was proposed in Optimal Subarchitecture Extraction for BERT by
+Adrian de Wynter and Daniel J. Perry. It is an optimal subset of architectural parameters for the BERT, which the
+authors refer to as "Bort".
+The abstract from the paper is the following:
+We extract an optimal subset of architectural parameters for the BERT architecture from Devlin et al. (2018) by
+applying recent breakthroughs in algorithms for neural architecture search. This optimal subset, which we refer to as
+"Bort", is demonstrably smaller, having an effective (that is, not counting the embedding layer) size of 5.5% the
+original BERT-large architecture, and 16% of the net size. Bort is also able to be pretrained in 288 GPU hours, which
+is 1.2% of the time required to pretrain the highest-performing BERT parametric architectural variant, RoBERTa-large
+(Liu et al., 2019), and about 33% of that of the world-record, in GPU hours, required to train BERT-large on the same
+hardware. It is also 7.9x faster on a CPU, as well as being better performing than other compressed variants of the
+architecture, and some of the non-compressed variants: it obtains performance improvements of between 0.3% and 31%,
+absolute, with respect to BERT-large, on multiple public natural language understanding (NLU) benchmarks.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b3be9f8af226d4480e9d3e983b8426c00652fd8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bort.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+BORT's model architecture is based on BERT, refer to BERT's documentation page for the
+  model's API reference as well as usage examples.
+BORT uses the RoBERTa tokenizer instead of the BERT tokenizer, refer to RoBERTa's documentation page for the tokenizer's API reference as well as usage examples.
+BORT requires a specific fine-tuning algorithm, called Agora ,
+  that is sadly not open-sourced yet. It would be very useful for the community, if someone tries to implement the
+  algorithm to make BORT fine-tuning work.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71d0ed5c1917e1fb938a7a186271d7c0f20e6b97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+BridgeTower
+Overview
+The BridgeTower model was proposed in BridgeTower: Building Bridges Between Encoders in Vision-Language Representative Learning by Xiao Xu, Chenfei Wu, Shachar Rosenman, Vasudev Lal, Wanxiang Che, Nan Duan. The goal of this model is to build a
+bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder thus achieving remarkable performance on various downstream tasks with almost negligible additional performance and computational costs.
+This paper has been accepted to the AAAI'23 conference. 
+The abstract from the paper is the following:
+Vision-Language (VL) models with the TWO-TOWER architecture have dominated visual-language representation learning in recent years.
+Current VL models either use lightweight uni-modal encoders and learn to extract, align and fuse both modalities simultaneously in a deep cross-modal encoder, or feed the last-layer uni-modal representations from the deep pre-trained uni-modal encoders into the top cross-modal encoder.
+Both approaches potentially restrict vision-language representation learning and limit model performance. In this paper, we propose BRIDGETOWER, which introduces multiple bridge layers that build a connection between the top layers of uni-modal encoders and each layer of the crossmodal encoder.
+This enables effective bottom-up cross-modal alignment and fusion between visual and textual representations of different semantic levels of pre-trained uni-modal encoders in the cross-modal encoder. Pre-trained with only 4M images, BRIDGETOWER achieves state-of-the-art performance on various downstream vision-language tasks.
+In particular, on the VQAv2 test-std set, BRIDGETOWER achieves an accuracy of 78.73%, outperforming the previous state-of-the-art model METER by 1.09% with the same pre-training data and almost negligible additional parameters and computational costs.
+Notably, when further scaling the model, BRIDGETOWER achieves an accuracy of 81.15%, surpassing models that are pre-trained on orders-of-magnitude larger datasets.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8eab57463c83ac3fe7f7fa44437a4bf70879e854
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+BridgeTower architecture. Taken from the original paper. 
+This model was contributed by Anahita Bhiwandiwalla, Tiep Le and Shaoyen Tseng. The original code can be found here.
+Usage tips and examples
+BridgeTower consists of a visual encoder, a textual encoder and cross-modal encoder with multiple lightweight bridge layers.
+The goal of this approach was to build a bridge between each uni-modal encoder and the cross-modal encoder to enable comprehensive and detailed interaction at each layer of the cross-modal encoder.
+In principle, one can apply any visual, textual or cross-modal encoder in the proposed architecture.
+The [BridgeTowerProcessor] wraps [RobertaTokenizer] and [BridgeTowerImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run contrastive learning using [BridgeTowerProcessor] and [BridgeTowerForContrastiveLearning].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs
+
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a42bbdd8fb7cb9404b81bfe97ce3c07cf822f28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+The following example shows how to run image-text retrieval using [BridgeTowerProcessor] and [BridgeTowerForImageAndTextRetrieval].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForImageAndTextRetrieval
+import requests
+from PIL import Image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = ["An image of two cats chilling on a couch", "A football player scoring a goal"]
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForImageAndTextRetrieval.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+forward pass
+scores = dict()
+for text in texts:
+     # prepare inputs
+     encoding = processor(image, text, return_tensors="pt")
+     outputs = model(**encoding)
+     scores[text] = outputs.logits[0, 1].item()
+
+The following example shows how to run masked language modeling using [BridgeTowerProcessor] and [BridgeTowerForMaskedLM].
+thon
+
+from transformers import BridgeTowerProcessor, BridgeTowerForMaskedLM
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000360943.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+text = "a  looking out of the window"
+processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+model = BridgeTowerForMaskedLM.from_pretrained("BridgeTower/bridgetower-base-itm-mlm")
+prepare inputs
+encoding = processor(image, text, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+results = processor.decode(outputs.logits.argmax(dim=-1).squeeze(0).tolist())
+print(results)
+.a cat looking out of the window.
+
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf4e67405c646b45d4e47963c44bdc3df5e81b76
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bridgetower.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+Tips:
+
+This implementation of BridgeTower uses [RobertaTokenizer] to generate text embeddings and OpenAI's CLIP/ViT model to compute visual embeddings.
+Checkpoints for pre-trained bridgeTower-base and bridgetower masked language modeling and image text matching are released.
+Please refer to Table 5 for BridgeTower's performance on Image Retrieval and other down stream tasks.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+BridgeTowerConfig
+[[autodoc]] BridgeTowerConfig
+BridgeTowerTextConfig
+[[autodoc]] BridgeTowerTextConfig
+BridgeTowerVisionConfig
+[[autodoc]] BridgeTowerVisionConfig
+BridgeTowerImageProcessor
+[[autodoc]] BridgeTowerImageProcessor
+    - preprocess
+BridgeTowerProcessor
+[[autodoc]] BridgeTowerProcessor
+    - call
+BridgeTowerModel
+[[autodoc]] BridgeTowerModel
+    - forward
+BridgeTowerForContrastiveLearning
+[[autodoc]] BridgeTowerForContrastiveLearning
+    - forward
+BridgeTowerForMaskedLM
+[[autodoc]] BridgeTowerForMaskedLM
+    - forward
+BridgeTowerForImageAndTextRetrieval
+[[autodoc]] BridgeTowerForImageAndTextRetrieval
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cedd3639cc29397c813b2477fb933e6b2b78697
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+BROS
+Overview
+The BROS model was proposed in BROS: A Pre-trained Language Model Focusing on Text and Layout for Better Key Information Extraction from Documents by Teakgyu Hong, Donghyun Kim, Mingi Ji, Wonseok Hwang, Daehyun Nam, Sungrae Park.
+BROS stands for BERT Relying On Spatiality. It is an encoder-only Transformer model that takes a sequence of tokens and their bounding boxes as inputs and outputs a sequence of hidden states. BROS encode relative spatial information instead of using absolute spatial information.
+It is pre-trained with two objectives: a token-masked language modeling objective (TMLM) used in BERT, and a novel area-masked language modeling objective (AMLM)
+In TMLM, tokens are randomly masked, and the model predicts the masked tokens using spatial information and other unmasked tokens.
+AMLM is a 2D version of TMLM. It randomly masks text tokens and predicts with the same information as TMLM, but it masks text blocks (areas).
+BrosForTokenClassification has a simple linear layer on top of BrosModel. It predicts the label of each token.
+BrosSpadeEEForTokenClassification has an initial_token_classifier and subsequent_token_classifier on top of BrosModel. initial_token_classifier is used to predict the first token of each entity, and subsequent_token_classifier is used to predict the next token of within entity. BrosSpadeELForTokenClassification has an entity_linker on top of BrosModel. entity_linker is used to predict the relation between two entities.
+BrosForTokenClassification and BrosSpadeEEForTokenClassification essentially perform the same job. However, BrosForTokenClassification assumes input tokens are perfectly serialized (which is very challenging task since they exist in a 2D space), while BrosSpadeEEForTokenClassification allows for more flexibility in handling serialization errors as it predicts next connection tokens from one token.
+BrosSpadeELForTokenClassification perform the intra-entity linking task. It predicts relation from one token (of one entity) to another token (of another entity) if these two entities share some relation.
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e03f06929df08b631deb29d0e906903d5b733a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+BROS achieves comparable or better result on Key Information Extraction (KIE) benchmarks such as FUNSD, SROIE, CORD and SciTSR, without relying on explicit visual features.
+The abstract from the paper is the following:
+Key information extraction (KIE) from document images requires understanding the contextual and spatial semantics of texts in two-dimensional (2D) space. Many recent studies try to solve the task by developing pre-trained language models focusing on combining visual features from document images with texts and their layout. On the other hand, this paper tackles the problem by going back to the basic: effective combination of text and layout. Specifically, we propose a pre-trained language model, named BROS (BERT Relying On Spatiality), that encodes relative positions of texts in 2D space and learns from unlabeled documents with area-masking strategy. With this optimized training scheme for understanding texts in 2D space, BROS shows comparable or better performance compared to previous methods on four KIE benchmarks (FUNSD, SROIE, CORD, and SciTSR) without relying on visual features. This paper also reveals two real-world challenges in KIE tasks-(1) minimizing the error from incorrect text ordering and (2) efficient learning from fewer downstream examples-and demonstrates the superiority of BROS over previous methods.*
+This model was contributed by jinho8345. The original code can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0892d0881eeb20d778e7a2b7a0e76f79567756bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+[~transformers.BrosModel.forward] requires input_ids and bbox (bounding box). Each bounding box should be in (x0, y0, x1, y1) format (top-left corner, bottom-right corner). Obtaining of Bounding boxes depends on external OCR system. The x coordinate should be normalized by document image width, and the y coordinate should be normalized by document image height.
+
+thon
+def expand_and_normalize_bbox(bboxes, doc_width, doc_height):
+    # here, bboxes are numpy array
+# Normalize bbox -> 0 ~ 1
+bboxes[:, [0, 2]] = bboxes[:, [0, 2]] / width
+bboxes[:, [1, 3]] = bboxes[:, [1, 3]] / height
+
+[~transformers.BrosForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward, ~transformers.BrosSpadeEEForTokenClassification.forward] require not only input_ids and bbox but also box_first_token_mask for loss calculation. It is a mask to filter out non-first tokens of each box. You can obtain this mask by saving start token indices of bounding boxes when creating input_ids from words. You can make box_first_token_mask with following code,
+
+thon
+def make_box_first_token_mask(bboxes, words, tokenizer, max_seq_length=512):
+box_first_token_mask = np.zeros(max_seq_length, dtype=np.bool_)
+
+# encode(tokenize) each word from words (List[str])
+input_ids_list: List[List[int]] = [tokenizer.encode(e, add_special_tokens=False) for e in words]
+
+# get the length of each box
+tokens_length_list: List[int] = [len(l) for l in input_ids_list]
+
+box_end_token_indices = np.array(list(itertools.accumulate(tokens_length_list)))
+box_start_token_indices = box_end_token_indices - np.array(tokens_length_list)
+
+# filter out the indices that are out of max_seq_length
+box_end_token_indices = box_end_token_indices[box_end_token_indices < max_seq_length - 1]
+if len(box_start_token_indices) > len(box_end_token_indices):
+    box_start_token_indices = box_start_token_indices[: len(box_end_token_indices)]
+
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45f8b1c10b276f8afd2a3679229dcdd4a14e5468
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_bros.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+# set box_start_token_indices to True
+box_first_token_mask[box_start_token_indices] = True
+
+return box_first_token_mask
+
+Resources
+
+Demo scripts can be found here.
+
+BrosConfig
+[[autodoc]] BrosConfig
+BrosProcessor
+[[autodoc]] BrosProcessor
+    - call
+BrosModel
+[[autodoc]] BrosModel
+    - forward
+BrosForTokenClassification
+[[autodoc]] BrosForTokenClassification
+    - forward
+BrosSpadeEEForTokenClassification
+[[autodoc]] BrosSpadeEEForTokenClassification
+    - forward
+BrosSpadeELForTokenClassification
+[[autodoc]] BrosSpadeELForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ae38d11d6b8d76478e61078edc847dc1639c67c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ByT5
+Overview
+The ByT5 model was presented in ByT5: Towards a token-free future with pre-trained byte-to-byte models by Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir
+Kale, Adam Roberts, Colin Raffel.
+The abstract from the paper is the following:
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units.
+Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from
+the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they
+can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by
+removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token
+sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of
+operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with
+minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count,
+training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level
+counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on
+tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of
+pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our
+experiments.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+
+ByT5's architecture is based on the T5v1.1 model, refer to T5v1.1's documentation page for the API reference. They
+only differ in how inputs should be prepared for the model, see the code examples below.
+
+Since ByT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Usage example
+ByT5 works on raw UTF-8 bytes, so it can be used without a tokenizer:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd8c48c7e9b8fee27df980f4623b6291e30bedae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+from transformers import T5ForConditionalGeneration
+import torch
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+num_special_tokens = 3
+Model has 3 special tokens which take up the input ids 0,1,2 of ByT5.
+=> Need to shift utf-8 character encodings by 3 before passing ids to model.
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + num_special_tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + num_special_tokens
+loss = model(input_ids, labels=labels).loss
+loss.item()
+2.66
+
+For batched inference and training it is however recommended to make use of the tokenizer:
+thon
+
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
+model_inputs = tokenizer(
+     ["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt"
+ )
+labels_dict = tokenizer(
+     ["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt"
+ )
+labels = labels_dict.input_ids
+loss = model(**model_inputs, labels=labels).loss
+loss.item()
+17.9
+
+Similar to T5, ByT5 was trained on the span-mask denoising task. However, 
+since the model works directly on characters, the pretraining task is a bit 
+different. Let's corrupt some characters of the 
+input sentence "The dog chases a ball in the park." and ask ByT5 to predict them 
+for us.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d87b95995924cba7f5d16e9a0d40021a3d5bbdc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+tokenizer = AutoTokenizer.from_pretrained("google/byt5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/byt5-base")
+input_ids_prompt = "The dog chases a ball in the park."
+input_ids = tokenizer(input_ids_prompt).input_ids
+Note that we cannot add "{extra_id_}" to the string directly
+as the Byte tokenizer would incorrectly merge the tokens
+For ByT5, we need to work directly on the character level
+Contrary to T5, ByT5 does not use sentinel tokens for masking, but instead
+uses final utf character ids.
+UTF-8 is represented by 8 bits and ByT5 has 3 special tokens.
+=> There are 2**8+2 = 259 input ids and mask tokens count down from index 258.
+=> mask to "The dog [258]a ball [257]park."
+input_ids = torch.tensor([input_ids[:8] + [258] + input_ids[14:21] + [257] + input_ids[28:]])
+input_ids
+tensor([[ 87, 107, 104,  35, 103, 114, 106,  35, 258,  35, 100,  35, 101, 100, 111, 111, 257,  35, 115, 100, 117, 110,  49,   1]])
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0fdf8721d1879b572bd7d11ff91e51676d241685
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+ByT5 produces only one char at a time so we need to produce many more output characters here -> set max_length=100.
+output_ids = model.generate(input_ids, max_length=100)[0].tolist()
+output_ids
+[0, 258, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118, 257,  35, 108, 113,  35, 119, 107, 104,  35, 103, 108, 118, 102, 114, 256, 108, 113,  35, 119, 107, 104, 35, 115, 100, 117, 110,  49,  35,  87, 107, 104,  35, 103, 114, 106, 35, 108, 118,  35, 119, 107, 104,  35, 114, 113, 104,  35, 122, 107, 114,  35, 103, 114, 104, 118,  35, 100,  35, 101, 100, 111, 111,  35, 108, 113, 255,  35, 108, 113,  35, 119, 107, 104,  35, 115, 100, 117, 110,  49]
+^- Note how 258 descends to 257, 256, 255
+Now we need to split on the sentinel tokens, let's write a short loop for this
+output_ids_list = []
+start_token = 0
+sentinel_token = 258
+while sentinel_token in output_ids:
+     split_idx = output_ids.index(sentinel_token)
+     output_ids_list.append(output_ids[start_token:split_idx])
+     start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbbe69337e19abe65288ff5524913d297f7c2ddf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_4.txt
@@ -0,0 +1,6 @@
+start_token = split_idx
+     sentinel_token -= 1
+output_ids_list.append(output_ids[start_token:])
+output_string = tokenizer.batch_decode(output_ids_list)
+output_string
+['', 'is the one who does', ' in the disco', 'in the park. The dog is the one who does a ball in', ' in the park.']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0631a1f11df5e93d74897acd4d6e95e9967094a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_byt5.txt_chunk_5.txt
@@ -0,0 +1,3 @@
+ByT5Tokenizer
+[[autodoc]] ByT5Tokenizer
+See [ByT5Tokenizer] for all details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec286327c8055b236f0c70affc09f8aac253b4a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+CamemBERT
+Overview
+The CamemBERT model was proposed in CamemBERT: a Tasty French Language Model by
+Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suárez, Yoann Dupont, Laurent Romary, Éric Villemonte de la
+Clergerie, Djamé Seddah, and Benoît Sagot. It is based on Facebook's RoBERTa model released in 2019. It is a model
+trained on 138GB of French text.
+The abstract from the paper is the following:
+Pretrained language models are now ubiquitous in Natural Language Processing. Despite their success, most available
+models have either been trained on English data or on the concatenation of data in multiple languages. This makes
+practical use of such models --in all languages except English-- very limited. Aiming to address this issue for French,
+we release CamemBERT, a French version of the Bi-directional Encoders for Transformers (BERT). We measure the
+performance of CamemBERT compared to multilingual models in multiple downstream tasks, namely part-of-speech tagging,
+dependency parsing, named-entity recognition, and natural language inference. CamemBERT improves the state of the art
+for most of the tasks considered. We release the pretrained model for CamemBERT hoping to foster research and
+downstream applications for French NLP.
+This model was contributed by the ALMAnaCH team (Inria). The original code can be found here.
+
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well 
+as the information relative to the inputs and outputs.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+CamembertConfig
+[[autodoc]] CamembertConfig
+CamembertTokenizer
+[[autodoc]] CamembertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CamembertTokenizerFast
+[[autodoc]] CamembertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6df7a6618b132b6c92dbf82c3605f69040198676
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_camembert.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+CamembertModel
+[[autodoc]] CamembertModel
+CamembertForCausalLM
+[[autodoc]] CamembertForCausalLM
+CamembertForMaskedLM
+[[autodoc]] CamembertForMaskedLM
+CamembertForSequenceClassification
+[[autodoc]] CamembertForSequenceClassification
+CamembertForMultipleChoice
+[[autodoc]] CamembertForMultipleChoice
+CamembertForTokenClassification
+[[autodoc]] CamembertForTokenClassification
+CamembertForQuestionAnswering
+[[autodoc]] CamembertForQuestionAnswering
+
+TFCamembertModel
+[[autodoc]] TFCamembertModel
+TFCamembertForCasualLM
+[[autodoc]] TFCamembertForCausalLM
+TFCamembertForMaskedLM
+[[autodoc]] TFCamembertForMaskedLM
+TFCamembertForSequenceClassification
+[[autodoc]] TFCamembertForSequenceClassification
+TFCamembertForMultipleChoice
+[[autodoc]] TFCamembertForMultipleChoice
+TFCamembertForTokenClassification
+[[autodoc]] TFCamembertForTokenClassification
+TFCamembertForQuestionAnswering
+[[autodoc]] TFCamembertForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..38eba1d98bc94cb3cbfdc33a66a994f14e5914ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CANINE
+Overview
+The CANINE model was proposed in CANINE: Pre-training an Efficient Tokenization-Free Encoder for Language
+Representation by Jonathan H. Clark, Dan Garrette, Iulia Turc, John Wieting. It's
+among the first papers that trains a Transformer without using an explicit tokenization step (such as Byte Pair
+Encoding (BPE), WordPiece or SentencePiece). Instead, the model is trained directly at a Unicode character-level.
+Training at a character-level inevitably comes with a longer sequence length, which CANINE solves with an efficient
+downsampling strategy, before applying a deep Transformer encoder.
+The abstract from the paper is the following:
+Pipelined NLP systems have largely been superseded by end-to-end neural modeling, yet nearly all commonly-used models
+still require an explicit tokenization step. While recent tokenization approaches based on data-derived subword
+lexicons are less brittle than manually engineered tokenizers, these techniques are not equally suited to all
+languages, and the use of any fixed vocabulary may limit a model's ability to adapt. In this paper, we present CANINE,
+a neural encoder that operates directly on character sequences, without explicit tokenization or vocabulary, and a
+pre-training strategy that operates either directly on characters or optionally uses subwords as a soft inductive bias.
+To use its finer-grained input effectively and efficiently, CANINE combines downsampling, which reduces the input
+sequence length, with a deep transformer stack, which encodes context. CANINE outperforms a comparable mBERT model by
+2.8 F1 on TyDi QA, a challenging multilingual benchmark, despite having 28% fewer model parameters.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa599576daaf134ea8d0922c7963cb3463e0f6fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+CANINE uses no less than 3 Transformer encoders internally: 2 "shallow" encoders (which only consist of a single
+  layer) and 1 "deep" encoder (which is a regular BERT encoder). First, a "shallow" encoder is used to contextualize
+  the character embeddings, using local attention. Next, after downsampling, a "deep" encoder is applied. Finally,
+  after upsampling, a "shallow" encoder is used to create the final character embeddings. Details regarding up- and
+  downsampling can be found in the paper.
+CANINE uses a max sequence length of 2048 characters by default. One can use [CanineTokenizer]
+  to prepare text for the model.
+Classification can be done by placing a linear layer on top of the final hidden state of the special [CLS] token
+  (which has a predefined Unicode code point). For token classification tasks however, the downsampled sequence of
+  tokens needs to be upsampled again to match the length of the original character sequence (which is 2048). The
+  details for this can be found in the paper.
+
+Model checkpoints:
+
+google/canine-c: Pre-trained with autoregressive character loss,
+    12-layer, 768-hidden, 12-heads, 121M parameters (size ~500 MB).
+google/canine-s: Pre-trained with subword loss, 12-layer,
+    768-hidden, 12-heads, 121M parameters (size ~500 MB).
+
+Usage example
+CANINE works on raw characters, so it can be used without a tokenizer:
+thon
+
+from transformers import CanineModel
+import torch
+model = CanineModel.from_pretrained("google/canine-c")  # model pre-trained with autoregressive character loss
+text = "hello world"
+use Python's built-in ord() function to turn each character into its unicode code point id
+input_ids = torch.tensor([[ord(char) for char in text]])
+outputs = model(input_ids)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..522f796d5c870edaaca275f161d09e9b7c695487
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_canine.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+For batched inference and training, it is however recommended to make use of the tokenizer (to pad/truncate all
+sequences to the same length):
+thon
+
+from transformers import CanineTokenizer, CanineModel
+model = CanineModel.from_pretrained("google/canine-c")
+tokenizer = CanineTokenizer.from_pretrained("google/canine-c")
+inputs = ["Life is like a box of chocolates.", "You never know what you gonna get."]
+encoding = tokenizer(inputs, padding="longest", truncation=True, return_tensors="pt")
+outputs = model(**encoding)  # forward pass
+pooled_output = outputs.pooler_output
+sequence_output = outputs.last_hidden_state
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+CanineConfig
+[[autodoc]] CanineConfig
+CanineTokenizer
+[[autodoc]] CanineTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+CANINE specific outputs
+[[autodoc]] models.canine.modeling_canine.CanineModelOutputWithPooling
+CanineModel
+[[autodoc]] CanineModel
+    - forward
+CanineForSequenceClassification
+[[autodoc]] CanineForSequenceClassification
+    - forward
+CanineForMultipleChoice
+[[autodoc]] CanineForMultipleChoice
+    - forward
+CanineForTokenClassification
+[[autodoc]] CanineForTokenClassification
+    - forward
+CanineForQuestionAnswering
+[[autodoc]] CanineForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0f5218d4dc1537da34f22fe02d19cda630c1903
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Chinese-CLIP
+Overview
+The Chinese-CLIP model was proposed in Chinese CLIP: Contrastive Vision-Language Pretraining in Chinese by An Yang, Junshu Pan, Junyang Lin, Rui Men, Yichang Zhang, Jingren Zhou, Chang Zhou.
+Chinese-CLIP is an implementation of CLIP (Radford et al., 2021) on a large-scale dataset of Chinese image-text pairs. It is capable of performing cross-modal retrieval and also playing as a vision backbone for vision tasks like zero-shot image classification, open-domain object detection, etc. The original Chinese-CLIP code is released at this link.
+The abstract from the paper is the following:
+The tremendous success of CLIP (Radford et al., 2021) has promoted the research and application of contrastive learning for vision-language pretraining. In this work, we construct a large-scale dataset of image-text pairs in Chinese, where most data are retrieved from publicly available datasets, and we pretrain Chinese CLIP models on the new dataset. We develop 5 Chinese CLIP models of multiple sizes, spanning from 77 to 958 million parameters. Furthermore, we propose a two-stage pretraining method, where the model is first trained with the image encoder frozen and then trained with all parameters being optimized, to achieve enhanced model performance. Our comprehensive experiments demonstrate that Chinese CLIP can achieve the state-of-the-art performance on MUGE, Flickr30K-CN, and COCO-CN in the setups of zero-shot learning and finetuning, and it is able to achieve competitive performance in zero-shot image classification based on the evaluation on the ELEVATER benchmark (Li et al., 2022). Our codes, pretrained models, and demos have been released.
+The Chinese-CLIP model was contributed by OFA-Sys.
+Usage example
+The code snippet below shows how to compute image & text features and similarities:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10e7eb2cc9645bc54a1c28fc798536cda9e03d70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_1.txt
@@ -0,0 +1,29 @@
+from PIL import Image
+import requests
+from transformers import ChineseCLIPProcessor, ChineseCLIPModel
+model = ChineseCLIPModel.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+processor = ChineseCLIPProcessor.from_pretrained("OFA-Sys/chinese-clip-vit-base-patch16")
+url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
+image = Image.open(requests.get(url, stream=True).raw)
+Squirtle, Bulbasaur, Charmander, Pikachu in English
+texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"]
+compute image feature
+inputs = processor(images=image, return_tensors="pt")
+image_features = model.get_image_features(**inputs)
+image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute text features
+inputs = processor(text=texts, padding=True, return_tensors="pt")
+text_features = model.get_text_features(**inputs)
+text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)  # normalize
+compute image-text similarity scores
+inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # probs: [[1.2686e-03, 5.4499e-02, 6.7968e-04, 9.4355e-01]]
+
+Currently, following scales of pretrained Chinese-CLIP models are available on 🤗 Hub:
+
+OFA-Sys/chinese-clip-vit-base-patch16
+OFA-Sys/chinese-clip-vit-large-patch14
+OFA-Sys/chinese-clip-vit-large-patch14-336px
+OFA-Sys/chinese-clip-vit-huge-patch14
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01ab653b5291dbe07ae1bed313ea52006709d6fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_chinese_clip.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+ChineseCLIPConfig
+[[autodoc]] ChineseCLIPConfig
+    - from_text_vision_configs
+ChineseCLIPTextConfig
+[[autodoc]] ChineseCLIPTextConfig
+ChineseCLIPVisionConfig
+[[autodoc]] ChineseCLIPVisionConfig
+ChineseCLIPImageProcessor
+[[autodoc]] ChineseCLIPImageProcessor
+    - preprocess
+ChineseCLIPFeatureExtractor
+[[autodoc]] ChineseCLIPFeatureExtractor
+ChineseCLIPProcessor
+[[autodoc]] ChineseCLIPProcessor
+ChineseCLIPModel
+[[autodoc]] ChineseCLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+ChineseCLIPTextModel
+[[autodoc]] ChineseCLIPTextModel
+    - forward
+ChineseCLIPVisionModel
+[[autodoc]] ChineseCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd1f80785b9b50119369d3e21c6425d07df17232
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+CLAP
+Overview
+The CLAP model was proposed in Large Scale Contrastive Language-Audio pretraining with
+feature fusion and keyword-to-caption augmentation by Yusong Wu, Ke Chen, Tianyu Zhang, Yuchen Hui, Taylor Berg-Kirkpatrick, Shlomo Dubnov.
+CLAP (Contrastive Language-Audio Pretraining) is a neural network trained on a variety of (audio, text) pairs. It can be instructed in to predict the most relevant text snippet, given an audio, without directly optimizing for the task. The CLAP model uses a SWINTransformer to get audio features from a log-Mel spectrogram input, and a RoBERTa model to get text features. Both the text and audio features are then projected to a latent space with identical dimension. The dot product between the projected audio and text features is then used as a similar score.
+The abstract from the paper is the following:
+Contrastive learning has shown remarkable success in the field of multimodal representation learning. In this paper, we propose a pipeline of contrastive language-audio pretraining to develop an audio representation by combining audio data with natural language descriptions. To accomplish this target, we first release LAION-Audio-630K, a large collection of 633,526 audio-text pairs from different data sources. Second, we construct a contrastive language-audio pretraining model by considering different audio encoders and text encoders. We incorporate the feature fusion mechanism and keyword-to-caption augmentation into the model design to further enable the model to process audio inputs of variable lengths and enhance the performance. Third, we perform comprehensive experiments to evaluate our model across three tasks: text-to-audio retrieval, zero-shot audio classification, and supervised audio classification. The results demonstrate that our model achieves superior performance in text-to-audio retrieval task. In audio classification tasks, the model achieves state-of-the-art performance in the zeroshot setting and is able to obtain performance comparable to models' results in the non-zero-shot setting. LAION-Audio-6
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+ClapConfig
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12ee6a6a915084afe93fdbae07e0f60c8e6ac172
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clap.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+[[autodoc]] ClapConfig
+    - from_text_audio_configs
+ClapTextConfig
+[[autodoc]] ClapTextConfig
+ClapAudioConfig
+[[autodoc]] ClapAudioConfig
+ClapFeatureExtractor
+[[autodoc]] ClapFeatureExtractor
+ClapProcessor
+[[autodoc]] ClapProcessor
+ClapModel
+[[autodoc]] ClapModel
+    - forward
+    - get_text_features
+    - get_audio_features
+ClapTextModel
+[[autodoc]] ClapTextModel
+    - forward
+ClapTextModelWithProjection
+[[autodoc]] ClapTextModelWithProjection
+    - forward
+ClapAudioModel
+[[autodoc]] ClapAudioModel
+    - forward
+ClapAudioModelWithProjection
+[[autodoc]] ClapAudioModelWithProjection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b07c0b63f160ca5a53f62e965b3b107847b66389
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+CLIP
+Overview
+The CLIP model was proposed in Learning Transferable Visual Models From Natural Language Supervision by Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh,
+Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, Ilya Sutskever. CLIP
+(Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be
+instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing
+for the task, similarly to the zero-shot capabilities of GPT-2 and 3.
+The abstract from the paper is the following:
+State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This
+restricted form of supervision limits their generality and usability since additional labeled data is needed to specify
+any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a
+much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes
+with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400
+million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference
+learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study
+the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks
+such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The
+model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need
+for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot
+without needing to use any of the 1.28 million training examples it was trained on. We release our code and pre-trained
+model weights at this https URL.
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e73dc8ef5b5507a40277bd8825ec7920725d1c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+This model was contributed by valhalla. The original code can be found here.
+Usage tips and example
+CLIP is a multi-modal vision and language model. It can be used for image-text similarity and for zero-shot image
+classification. CLIP uses a ViT like transformer to get visual features and a causal language model to get the text
+features. Both the text and visual features are then projected to a latent space with identical dimension. The dot
+product between the projected image and text features is then used as a similar score.
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image. The authors
+also add absolute position embeddings, and feed the resulting sequence of vectors to a standard Transformer encoder.
+The [CLIPImageProcessor] can be used to resize (or rescale) and normalize images for the model.
+The [CLIPTokenizer] is used to encode the text. The [CLIPProcessor] wraps
+[CLIPImageProcessor] and [CLIPTokenizer] into a single instance to both
+encode the text and prepare the images. The following example shows how to get the image-text similarity scores using
+[CLIPProcessor] and [CLIPModel].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee5e01cc78dcdab6542f4ea54a26f5d4c906eaa8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+from PIL import Image
+import requests
+from transformers import CLIPProcessor, CLIPModel
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
+outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIP.
+
+Fine tuning CLIP with Remote Sensing (Satellite) images and captions, a blog post about how to fine-tune CLIP with RSICD dataset and comparison of performance changes due to data augmentation.
+This example script shows how to train a CLIP-like vision-text dual encoder model using a pre-trained vision and text encoder using COCO dataset.
+
+A notebook on how to use a pretrained CLIP for inference with beam search for image captioning. 🌎
+
+Image retrieval
+
+A notebook on image retrieval using pretrained CLIP and computing MRR(Mean Reciprocal Rank) score. 🌎
+A notebook on image retrieval and showing the similarity score. 🌎
+A notebook on how to map images and texts to the same vector space using Multilingual CLIP. 🌎 
+A notebook on how to run CLIP on semantic image search using Unsplash and TMDB datasets. 🌎
+
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98cf9a816557fc5ec2c6262c6a3efa4e01a0f23b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_3.txt
@@ -0,0 +1,61 @@
+Explainability
+
+A notebook on how to visualize similarity between input token and image segment. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CLIPConfig
+[[autodoc]] CLIPConfig
+    - from_text_vision_configs
+CLIPTextConfig
+[[autodoc]] CLIPTextConfig
+CLIPVisionConfig
+[[autodoc]] CLIPVisionConfig
+CLIPTokenizer
+[[autodoc]] CLIPTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CLIPTokenizerFast
+[[autodoc]] CLIPTokenizerFast
+CLIPImageProcessor
+[[autodoc]] CLIPImageProcessor
+    - preprocess
+CLIPFeatureExtractor
+[[autodoc]] CLIPFeatureExtractor
+CLIPProcessor
+[[autodoc]] CLIPProcessor
+
+CLIPModel
+[[autodoc]] CLIPModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPTextModel
+[[autodoc]] CLIPTextModel
+    - forward
+CLIPTextModelWithProjection
+[[autodoc]] CLIPTextModelWithProjection
+    - forward
+CLIPVisionModelWithProjection
+[[autodoc]] CLIPVisionModelWithProjection
+    - forward
+CLIPVisionModel
+[[autodoc]] CLIPVisionModel
+    - forward
+CLIPForImageClassification
+[[autodoc]] CLIPForImageClassification
+    - forward
+
+TFCLIPModel
+[[autodoc]] TFCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+TFCLIPTextModel
+[[autodoc]] TFCLIPTextModel
+    - call
+TFCLIPVisionModel
+[[autodoc]] TFCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..229ec231359d58a151c230b6967c51f77070cbf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clip.txt_chunk_4.txt
@@ -0,0 +1,14 @@
+FlaxCLIPModel
+[[autodoc]] FlaxCLIPModel
+    - call
+    - get_text_features
+    - get_image_features
+FlaxCLIPTextModel
+[[autodoc]] FlaxCLIPTextModel
+    - call
+FlaxCLIPTextModelWithProjection
+[[autodoc]] FlaxCLIPTextModelWithProjection
+    - call
+FlaxCLIPVisionModel
+[[autodoc]] FlaxCLIPVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..732206c756c61c9069f29dfaea3cd3246e2bbb46
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+CLIPSeg
+Overview
+The CLIPSeg model was proposed in Image Segmentation Using Text and Image Prompts by Timo Lüddecke
+and Alexander Ecker. CLIPSeg adds a minimal decoder on top of a frozen CLIP model for zero- and one-shot image segmentation.
+The abstract from the paper is the following:
+Image segmentation is usually addressed by training a
+model for a fixed set of object classes. Incorporating additional classes or more complex queries later is expensive
+as it requires re-training the model on a dataset that encompasses these expressions. Here we propose a system
+that can generate image segmentations based on arbitrary
+prompts at test time. A prompt can be either a text or an
+image. This approach enables us to create a unified model
+(trained once) for three common segmentation tasks, which
+come with distinct challenges: referring expression segmentation, zero-shot segmentation and one-shot segmentation.
+We build upon the CLIP model as a backbone which we extend with a transformer-based decoder that enables dense
+prediction. After training on an extended version of the
+PhraseCut dataset, our system generates a binary segmentation map for an image based on a free-text prompt or on
+an additional image expressing the query. We analyze different variants of the latter image-based prompts in detail.
+This novel hybrid input allows for dynamic adaptation not
+only to the three segmentation tasks mentioned above, but
+to any binary segmentation task where a text or image query
+can be formulated. Finally, we find our system to adapt well
+to generalized queries involving affordances or properties
+ 
+ CLIPSeg overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+[CLIPSegForImageSegmentation] adds a decoder on top of [CLIPSegModel]. The latter is identical to [CLIPModel].
+[CLIPSegForImageSegmentation] can generate image segmentations based on arbitrary prompts at test time. A prompt can be either a text
+(provided to the model as input_ids) or an image (provided to the model as conditional_pixel_values). One can also provide custom
+conditional embeddings (provided to the model as conditional_embeddings).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..912d343c6c6c4e1925712a9f89b3b7402d278c8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clipseg.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CLIPSeg. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook that illustrates zero-shot image segmentation with CLIPSeg.
+
+CLIPSegConfig
+[[autodoc]] CLIPSegConfig
+    - from_text_vision_configs
+CLIPSegTextConfig
+[[autodoc]] CLIPSegTextConfig
+CLIPSegVisionConfig
+[[autodoc]] CLIPSegVisionConfig
+CLIPSegProcessor
+[[autodoc]] CLIPSegProcessor
+CLIPSegModel
+[[autodoc]] CLIPSegModel
+    - forward
+    - get_text_features
+    - get_image_features
+CLIPSegTextModel
+[[autodoc]] CLIPSegTextModel
+    - forward
+CLIPSegVisionModel
+[[autodoc]] CLIPSegVisionModel
+    - forward
+CLIPSegForImageSegmentation
+[[autodoc]] CLIPSegForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9297de1f9998d12d602e15611402af0162344d00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+CLVP
+Overview
+The CLVP (Contrastive Language-Voice Pretrained Transformer) model was proposed in Better speech synthesis through scaling by James Betker.
+The abstract from the paper is the following:
+In recent years, the field of image generation has been revolutionized by the application of autoregressive transformers and DDPMs. These approaches model the process of image generation as a step-wise probabilistic processes and leverage large amounts of compute and data to learn the image distribution. This methodology of improving performance need not be confined to images. This paper describes a way to apply advances in the image generative domain to speech synthesis. The result is TorToise - an expressive, multi-voice text-to-speech system.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+CLVP is an integral part of the Tortoise TTS model.
+CLVP can be used to compare different generated speech candidates with the provided text, and the best speech tokens are forwarded to the diffusion model.
+The use of the [ClvpModelForConditionalGeneration.generate()] method is strongly recommended for tortoise usage.
+Note that the CLVP model expects the audio to be sampled at 22.05 kHz contrary to other audio models which expects 16 kHz. 
+
+Brief Explanation:
+
+The [ClvpTokenizer] tokenizes the text input, and the [ClvpFeatureExtractor] extracts the log mel-spectrogram from the desired audio.
+[ClvpConditioningEncoder] takes those text tokens and audio representations and converts them into embeddings conditioned on the text and audio.
+The [ClvpForCausalLM] uses those embeddings to generate multiple speech candidates.
+Each speech candidate is passed through the speech encoder ([ClvpEncoder]) which converts them into a vector representation, and the text encoder ([ClvpEncoder]) converts the text tokens into the same latent space. 
+At the end, we compare each speech vector with the text vector to see which speech vector is most similar to the text vector. 
+[ClvpModelForConditionalGeneration.generate()] compresses all of the logic described above into a single method.  
+
+Example :
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58acb774be297d8548c8fd209874fc7b14d47a10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_clvp.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Example :
+thon
+
+import datasets
+from transformers import ClvpProcessor, ClvpModelForConditionalGeneration
+Define the Text and Load the Audio (We are taking an audio example from HuggingFace Hub using datasets library).
+text = "This is an example text."
+ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
+sample = ds[0]["audio"]
+Define processor and model.
+processor = ClvpProcessor.from_pretrained("susnato/clvp_dev")
+model = ClvpModelForConditionalGeneration.from_pretrained("susnato/clvp_dev")
+Generate processor output and model output.
+processor_output = processor(raw_speech=sample["array"], sampling_rate=sample["sampling_rate"], text=text, return_tensors="pt")
+generated_output = model.generate(**processor_output)
+
+ClvpConfig
+[[autodoc]] ClvpConfig
+    - from_sub_model_configs
+ClvpEncoderConfig
+[[autodoc]] ClvpEncoderConfig
+ClvpDecoderConfig
+[[autodoc]] ClvpDecoderConfig
+ClvpTokenizer
+[[autodoc]] ClvpTokenizer
+    - save_vocabulary
+ClvpFeatureExtractor
+[[autodoc]] ClvpFeatureExtractor
+    - call
+ClvpProcessor
+[[autodoc]] ClvpProcessor
+    - call
+    - decode
+    - batch_decode
+ClvpModelForConditionalGeneration
+[[autodoc]] ClvpModelForConditionalGeneration
+    - forward
+    - generate
+    - get_text_features
+    - get_speech_features
+ClvpForCausalLM
+[[autodoc]] ClvpForCausalLM
+ClvpModel
+[[autodoc]] ClvpModel
+ClvpEncoder
+[[autodoc]] ClvpEncoder
+ClvpDecoder
+[[autodoc]] ClvpDecoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5aad6e2bc6cde646f69f326ec9db47598eded19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+CodeLlama
+Overview
+The Code Llama model was proposed in Code Llama: Open Foundation Models for Code by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+The abstract from the paper is the following:
+We release Code Llama, a family of large language models for code based on Llama 2 providing state-of-the-art performance among open models, infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks. We provide multiple flavors to cover a wide range of applications: foundation models (Code Llama), Python specializations (Code Llama - Python), and instruction-following models (Code Llama - Instruct) with 7B, 13B and 34B parameters each. All models are trained on sequences of 16k tokens and show improvements on inputs with up to 100k tokens. 7B and 13B Code Llama and Code Llama - Instruct variants support infilling based on surrounding content. Code Llama reaches state-of-the-art performance among open models on several code benchmarks, with scores of up to 53% and 55% on HumanEval and MBPP, respectively. Notably, Code Llama - Python 7B outperforms Llama 2 70B on HumanEval and MBPP, and all our models outperform every other publicly available model on MultiPL-E. We release Code Llama under a permissive license that allows for both research and commercial use.
+Check out all Code Llama model checkpoints here and the officially released ones in the Meta Llama org.
+This model was contributed by ArthurZucker. The original code of the authors can be found here.
+Usage tips and examples
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f186c43848d3d5acda697c1dcc7773e9856218a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+The Llama2 family models, on which Code Llama is based, were trained using bfloat16, but the original inference uses float16. Let's look at the different precisions:
+
+float32: PyTorch convention on model initialization is to load models in float32, no matter with which dtype the model weights were stored. transformers also follows this convention for consistency with PyTorch. This will be picked by default. If you want the AutoModel API to cast the load the checkpoints with the storage weights type, you must specify torch_dtype="auto", e.g. model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto").
+bfloat16: Code Llama was trained with this precision, so we recommend using it for further training or fine-tuning.
+float16: We recommend running inference using this precision, as it's usually faster than bfloat16, and evaluation metrics show no discernible degradation with respect to bfloat16. You can also run inference using bfloat16, and we recommend you check inference results with both float16 and bfloat16 after fine-tuning.
+
+As mentioned above, the dtype of the storage weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using. The reason is that the model will first be downloaded (using the dtype of the checkpoints online) and then will be casted to the default dtype of torch (becomes torch.float32). If there is a specified torch_dtype, it will be used instead.
+
+Tips:
+- The infilling task is supported out of the box. You should be using the tokenizer.fill_token where you want your input to be filled.
+- The model conversion script is the same as for the Llama2 family:
+Here is a sample usage:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
+After conversion, the model and tokenizer can be loaded via:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c53ccd42a1a54cb1bb5bba752c4df2be470374e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+from transformers import LlamaForCausalLM, CodeLlamaTokenizer
+tokenizer = CodeLlamaTokenizer.from_pretrained("meta-llama/CodeLlama-7b-hf")
+model = LlamaForCausalLM.from_pretrained("meta-llama/CodeLlama-7b-hf")
+PROMPT = '''def remove_non_ascii(s: str) -> str:
+     """ 
+     return result
+ '''
+input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
+generated_ids = model.generate(input_ids, max_new_tokens=128)
+filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
+print(PROMPT.replace("", filling))
+def remove_non_ascii(s: str) -> str:
+    """ Remove non-ASCII characters from a string.
+
+    Args:
+        s: The string to remove non-ASCII characters from.
+
+    Returns:
+        The string with non-ASCII characters removed.
+    """
+    result = ""
+    for c in s:
+        if ord(c) < 128:
+            result += c
+    return result
+
+If you only want the infilled part:
+thon
+
+from transformers import pipeline
+import torch
+generator = pipeline("text-generation",model="meta-llama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
+generator('def remove_non_ascii(s: str) -> str:\n    """ \n    return result', max_new_tokens = 128)
+[{'generated_text': 'def remove_non_ascii(s: str) -> str:\n    """ \n    return resultRemove non-ASCII characters from a string. """\n    result = ""\n    for c in s:\n        if ord(c) < 128:\n            result += c'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9eb19d0a42367084b056ebac44386f8ed9f7d18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_code_llama.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Under the hood, the tokenizer automatically splits by <FILL_ME> to create a formatted input string that follows the original training pattern. This is more robust than preparing the pattern yourself: it avoids pitfalls, such as token glueing, that are very hard to debug.  To see how much CPU and GPU memory you need for this model or others, try this calculator which can help determine that value.
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+Code Llama has the same architecture as the Llama2 models, refer to Llama2's documentation page for the API reference.
+Find Code Llama tokenizer reference below. 
+
+CodeLlamaTokenizer
+[[autodoc]] CodeLlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeLlamaTokenizerFast
+[[autodoc]] CodeLlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f48204332ec1da14179513f13f5be218f50a45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+CodeGen
+Overview
+The CodeGen model was proposed in A Conversational Paradigm for Program Synthesis by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong.
+CodeGen is an autoregressive language model for program synthesis trained sequentially on The Pile, BigQuery, and BigPython.
+The abstract from the paper is the following:
+Program synthesis strives to generate a computer program as a solution to a given problem specification. We propose a conversational program synthesis approach via large language models, which addresses the challenges of searching over a vast program space and user intent specification faced in prior approaches. Our new approach casts the process of writing a specification and program as a multi-turn conversation between a user and a system. It treats program synthesis as a sequence prediction problem, in which the specification is expressed in natural language and the desired program is conditionally sampled. We train a family of large language models, called CodeGen, on natural language and programming language data. With weak supervision in the data and the scaling up of data size and model size, conversational capacities emerge from the simple autoregressive language modeling. To study the model behavior on conversational program synthesis, we develop a multi-turn programming benchmark (MTPB), where solving each problem requires multi-step synthesis via multi-turn conversation between the user and the model. Our findings show the emergence of conversational capabilities and the effectiveness of the proposed conversational program synthesis paradigm. In addition, our model CodeGen (with up to 16B parameters trained on TPU-v4) outperforms OpenAI's Codex on the HumanEval benchmark. We make the training library JaxFormer including checkpoints available as open source contribution: this https URL. 
+This model was contributed by Hiroaki Hayashi.
+The original code can be found here.
+Checkpoint Naming
+
+CodeGen model checkpoints are available on different pre-training data with variable sizes.
+The format is: Salesforce/codegen-{size}-{data}, where
+size: 350M, 2B, 6B, 16B
+data: 
+nl: Pre-trained on the Pile
+multi: Initialized with nl, then further pre-trained on multiple programming languages data
+mono: Initialized with multi, then further pre-trained on Python data
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd64c11c82a0e90984f7dd74e30dd391eab8c399
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_codegen.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+For example, Salesforce/codegen-350M-mono offers a 350 million-parameter checkpoint pre-trained sequentially on the Pile, multiple programming languages, and Python.
+
+Usage example
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+checkpoint = "Salesforce/codegen-350M-mono"
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+text = "def hello_world():"
+completion = model.generate(**tokenizer(text, return_tensors="pt"))
+print(tokenizer.decode(completion[0]))
+def hello_world():
+    print("Hello World")
+
+hello_world()
+
+Resources
+
+Causal language modeling task guide
+
+CodeGenConfig
+[[autodoc]] CodeGenConfig
+    - all
+CodeGenTokenizer
+[[autodoc]] CodeGenTokenizer
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+CodeGenTokenizerFast
+[[autodoc]] CodeGenTokenizerFast
+CodeGenModel
+[[autodoc]] CodeGenModel
+    - forward
+CodeGenForCausalLM
+[[autodoc]] CodeGenForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dd92610756bb3844ecf74cff898cf77d1d7cb90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Cohere
+Overview
+The Cohere Command-R model was proposed in the blogpost Command-R: Retrieval Augmented Generation at Production Scale by the Cohere Team.
+The abstract from the paper is the following:
+Command-R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. Today, we are introducing Command-R, a new LLM aimed at large-scale production workloads. Command-R targets the emerging “scalable” category of models that balance high efficiency with strong accuracy, enabling companies to move beyond proof of concept, and into production.
+*Command-R is a generative model optimized for long context tasks such as retrieval augmented generation (RAG) and using external APIs and tools. It is designed to work in concert with our industry-leading Embed and Rerank models to provide best-in-class integration for RAG applications and excel at enterprise use cases. As a model built for companies to implement at scale, Command-R boasts:
+- Strong accuracy on RAG and Tool Use
+- Low latency, and high throughput
+- Longer 128k context and lower pricing
+- Strong capabilities across 10 key languages
+- Model weights available on HuggingFace for research and evaluation
+Checkout model checkpoints here.
+This model was contributed by Saurabh Dash and Ahmet Üstün. The code of the implementation in Hugging Face is based on GPT-NeoX here.
+Usage tips
+
+The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..602eeaa0fdea705a81c457dc15c64498eafa3977
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+The model and tokenizer can be loaded via:
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Command-R. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc7d0a61b9e4d51513cb9d8c089ace1a9132505
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cohere.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+Loading FP16 model
+thon
+pip install transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+Format message with the command-r chat template
+messages = [{"role": "user", "content": "Hello, how are you?"}]
+input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+Loading bitsnbytes 4bit quantized model
+thon
+pip install transformers bitsandbytes accelerate
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(load_in_4bit=True)
+model_id = "CohereForAI/c4ai-command-r-v01"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config)
+gen_tokens = model.generate(
+    input_ids, 
+    max_new_tokens=100, 
+    do_sample=True, 
+    temperature=0.3,
+    )
+gen_text = tokenizer.decode(gen_tokens[0])
+print(gen_text)
+
+CohereConfig
+[[autodoc]] CohereConfig
+CohereTokenizerFast
+[[autodoc]] CohereTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+CohereModel
+[[autodoc]] CohereModel
+    - forward
+CohereForCausalLM
+[[autodoc]] CohereForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3446b86d7127a86a098d607b11fc4d21e98e76d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Conditional DETR
+Overview
+The Conditional DETR model was proposed in Conditional DETR for Fast Training Convergence by Depu Meng, Xiaokang Chen, Zejia Fan, Gang Zeng, Houqiang Li, Yuhui Yuan, Lei Sun, Jingdong Wang. Conditional DETR presents a conditional cross-attention mechanism for fast DETR training. Conditional DETR converges 6.7× to 10× faster than DETR.
+The abstract from the paper is the following:
+The recently-developed DETR approach applies the transformer encoder and decoder architecture to object detection and achieves promising performance. In this paper, we handle the critical issue, slow training convergence, and present a conditional cross-attention mechanism for fast DETR training. Our approach is motivated by that the cross-attention in DETR relies highly on the content embeddings for localizing the four extremities and predicting the box, which increases the need for high-quality content embeddings and thus the training difficulty. Our approach, named conditional DETR, learns a conditional spatial query from the decoder embedding for decoder multi-head cross-attention. The benefit is that through the conditional spatial query, each cross-attention head is able to attend to a band containing a distinct region, e.g., one object extremity or a region inside the object box. This narrows down the spatial range for localizing the distinct regions for object classification and box regression, thus relaxing the dependence on the content embeddings and easing the training. Empirical results show that conditional DETR converges 6.7× faster for the backbones R50 and R101 and 10× faster for stronger backbones DC5-R50 and DC5-R101. Code is available at https://github.com/Atten4Vis/ConditionalDETR.
+
+ Conditional DETR shows much faster convergence compared to the original DETR. Taken from the original paper.
+This model was contributed by DepuMeng. The original code can be found here.
+Resources
+
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd0b71da5ce67f12159ac5e89bbac3df7d280f25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_conditional_detr.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Scripts for finetuning [ConditionalDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+ConditionalDetrConfig
+[[autodoc]] ConditionalDetrConfig
+ConditionalDetrImageProcessor
+[[autodoc]] ConditionalDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrFeatureExtractor
+[[autodoc]] ConditionalDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_instance_segmentation
+    - post_process_semantic_segmentation
+    - post_process_panoptic_segmentation
+ConditionalDetrModel
+[[autodoc]] ConditionalDetrModel
+    - forward
+ConditionalDetrForObjectDetection
+[[autodoc]] ConditionalDetrForObjectDetection
+    - forward
+ConditionalDetrForSegmentation
+[[autodoc]] ConditionalDetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbd9a0d4033beeb77dc478aaf5bfd93f0cad9261
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+ConvBERT
+
+Overview
+The ConvBERT model was proposed in ConvBERT: Improving BERT with Span-based Dynamic Convolution by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng
+Yan.
+The abstract from the paper is the following:
+Pre-trained language models like BERT and its variants have recently achieved impressive performance in various
+natural language understanding tasks. However, BERT heavily relies on the global self-attention block and thus suffers
+large memory footprint and computation cost. Although all its attention heads query on the whole input sequence for
+generating the attention map from a global perspective, we observe some heads only need to learn local dependencies,
+which means the existence of computation redundancy. We therefore propose a novel span-based dynamic convolution to
+replace these self-attention heads to directly model local dependencies. The novel convolution heads, together with the
+rest self-attention heads, form a new mixed attention block that is more efficient at both global and local context
+learning. We equip BERT with this mixed attention design and build a ConvBERT model. Experiments have shown that
+ConvBERT significantly outperforms BERT and its variants in various downstream tasks, with lower training cost and
+fewer model parameters. Remarkably, ConvBERTbase model achieves 86.4 GLUE score, 0.7 higher than ELECTRAbase, while
+using less than 1/4 training cost. Code and pre-trained models will be released.
+This model was contributed by abhishek. The original implementation can be found
+here: https://github.com/yitu-opensource/ConvBert
+Usage tips
+ConvBERT training tips are similar to those of BERT. For usage tips refer to BERT documentation.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ConvBertConfig
+[[autodoc]] ConvBertConfig
+ConvBertTokenizer
+[[autodoc]] ConvBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ConvBertTokenizerFast
+[[autodoc]] ConvBertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..109a27cacb00612d407d3b38b92201c97a5f27f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convbert.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+ConvBertModel
+[[autodoc]] ConvBertModel
+    - forward
+ConvBertForMaskedLM
+[[autodoc]] ConvBertForMaskedLM
+    - forward
+ConvBertForSequenceClassification
+[[autodoc]] ConvBertForSequenceClassification
+    - forward
+ConvBertForMultipleChoice
+[[autodoc]] ConvBertForMultipleChoice
+    - forward
+ConvBertForTokenClassification
+[[autodoc]] ConvBertForTokenClassification
+    - forward
+ConvBertForQuestionAnswering
+[[autodoc]] ConvBertForQuestionAnswering
+    - forward
+
+TFConvBertModel
+[[autodoc]] TFConvBertModel
+    - call
+TFConvBertForMaskedLM
+[[autodoc]] TFConvBertForMaskedLM
+    - call
+TFConvBertForSequenceClassification
+[[autodoc]] TFConvBertForSequenceClassification
+    - call
+TFConvBertForMultipleChoice
+[[autodoc]] TFConvBertForMultipleChoice
+    - call
+TFConvBertForTokenClassification
+[[autodoc]] TFConvBertForTokenClassification
+    - call
+TFConvBertForQuestionAnswering
+[[autodoc]] TFConvBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1997a22bab31d5b45db76e6e78e2ed579738a40c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+ConvNeXT
+Overview
+The ConvNeXT model was proposed in A ConvNet for the 2020s by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
+ConvNeXT is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, that claims to outperform them.
+The abstract from the paper is the following:
+The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model.
+A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers
+(e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide
+variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive
+biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design
+of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models
+dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy
+and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.
+
+ ConvNeXT architecture. Taken from the original paper.
+This model was contributed by nielsr. TensorFlow version of the model was contributed by ariG23498,
+gante, and sayakpaul (equal contribution). The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXT.
+
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b66b5ee071c89fb15e29daa10f1944e9bccedcd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnext.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+[ConvNextForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextConfig
+[[autodoc]] ConvNextConfig
+ConvNextFeatureExtractor
+[[autodoc]] ConvNextFeatureExtractor
+ConvNextImageProcessor
+[[autodoc]] ConvNextImageProcessor
+    - preprocess
+
+ConvNextModel
+[[autodoc]] ConvNextModel
+    - forward
+ConvNextForImageClassification
+[[autodoc]] ConvNextForImageClassification
+    - forward
+
+TFConvNextModel
+[[autodoc]] TFConvNextModel
+    - call
+TFConvNextForImageClassification
+[[autodoc]] TFConvNextForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e60b2911e57b02fa283381bf552b40d8a4b629ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+ConvNeXt V2
+Overview
+The ConvNeXt V2 model was proposed in ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
+ConvNeXt V2 is a pure convolutional model (ConvNet), inspired by the design of Vision Transformers, and a successor of ConvNeXT.
+The abstract from the paper is the following:
+Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked  autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
+
+ ConvNeXt V2 architecture. Taken from the original paper.
+This model was contributed by adirik. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ConvNeXt V2.
+
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb38ecb75a18b866b581cb3269d3e7d42557131a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_convnextv2.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+[ConvNextV2ForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ConvNextV2Config
+[[autodoc]] ConvNextV2Config
+ConvNextV2Model
+[[autodoc]] ConvNextV2Model
+    - forward
+ConvNextV2ForImageClassification
+[[autodoc]] ConvNextV2ForImageClassification
+    - forward
+TFConvNextV2Model
+[[autodoc]] TFConvNextV2Model
+    - call
+TFConvNextV2ForImageClassification
+[[autodoc]] TFConvNextV2ForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9561219451f8bf41883c98d80b4984f3b4cf160d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpm.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+CPM
+Overview
+The CPM model was proposed in CPM: A Large-scale Generative Chinese Pre-trained Language Model by Zhengyan Zhang, Xu Han, Hao Zhou, Pei Ke, Yuxian Gu, Deming Ye, Yujia Qin,
+Yusheng Su, Haozhe Ji, Jian Guan, Fanchao Qi, Xiaozhi Wang, Yanan Zheng, Guoyang Zeng, Huanqi Cao, Shengqi Chen,
+Daixuan Li, Zhenbo Sun, Zhiyuan Liu, Minlie Huang, Wentao Han, Jie Tang, Juanzi Li, Xiaoyan Zhu, Maosong Sun.
+The abstract from the paper is the following:
+Pre-trained Language Models (PLMs) have proven to be beneficial for various downstream NLP tasks. Recently, GPT-3,
+with 175 billion parameters and 570GB training data, drew a lot of attention due to the capacity of few-shot (even
+zero-shot) learning. However, applying GPT-3 to address Chinese NLP tasks is still challenging, as the training corpus
+of GPT-3 is primarily English, and the parameters are not publicly available. In this technical report, we release the
+Chinese Pre-trained Language Model (CPM) with generative pre-training on large-scale Chinese training data. To the best
+of our knowledge, CPM, with 2.6 billion parameters and 100GB Chinese training data, is the largest Chinese pre-trained
+language model, which could facilitate several downstream Chinese NLP tasks, such as conversation, essay generation,
+cloze test, and language understanding. Extensive experiments demonstrate that CPM achieves strong performance on many
+NLP tasks in the settings of few-shot (even zero-shot) learning.
+This model was contributed by canwenxu. The original implementation can be found
+here: https://github.com/TsinghuaAI/CPM-Generate
+
+CPM's architecture is the same as GPT-2, except for tokenization method. Refer to GPT-2 documentation for 
+API reference information.  
+
+CpmTokenizer
+[[autodoc]] CpmTokenizer
+CpmTokenizerFast
+[[autodoc]] CpmTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd37eb7179fabc3861c0ab3377f70088d55cf218
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cpmant.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+CPMAnt
+Overview
+CPM-Ant is an open-source Chinese pre-trained language model (PLM) with 10B parameters. It is also the first milestone of the live training process of CPM-Live. The training process is cost-effective and environment-friendly. CPM-Ant also achieves promising results with delta tuning on the CUGE benchmark. Besides the full model, we also provide various compressed versions to meet the requirements of different hardware configurations. See more
+This model was contributed by OpenBMB. The original code can be found here.
+Resources
+
+A tutorial on CPM-Live.
+
+CpmAntConfig
+[[autodoc]] CpmAntConfig
+    - all
+CpmAntTokenizer
+[[autodoc]] CpmAntTokenizer
+    - all
+CpmAntModel
+[[autodoc]] CpmAntModel
+    - all
+CpmAntForCausalLM
+[[autodoc]] CpmAntForCausalLM
+    - all
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15e24d3dfc36086f849fe5aada8f7afbfd245b3c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+CTRL
+
+Overview
+CTRL model was proposed in CTRL: A Conditional Transformer Language Model for Controllable Generation by Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, Caiming Xiong and
+Richard Socher. It's a causal (unidirectional) transformer pre-trained using language modeling on a very large corpus
+of ~140 GB of text data with the first token reserved as a control code (such as Links, Books, Wikipedia etc.).
+The abstract from the paper is the following:
+Large-scale language models show promising text generation capabilities, but users cannot easily control particular
+aspects of the generated text. We release CTRL, a 1.63 billion-parameter conditional transformer language model,
+trained to condition on control codes that govern style, content, and task-specific behavior. Control codes were
+derived from structure that naturally co-occurs with raw text, preserving the advantages of unsupervised learning while
+providing more explicit control over text generation. These codes also allow CTRL to predict which parts of the
+training data are most likely given a sequence. This provides a potential method for analyzing large amounts of data
+via model-based source attribution.
+This model was contributed by keskarnitishr. The original code can be found
+here.
+Usage tips
+
+CTRL makes use of control codes to generate text: it requires generations to be started by certain words, sentences
+  or links to generate coherent text. Refer to the original implementation for
+  more information.
+CTRL is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+CTRL was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows CTRL to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The PyTorch models can take the past_key_values as input, which is the previously computed key/value attention pairs.
+  TensorFlow models accepts past as input. Using the past_key_values value prevents the model from re-computing
+  pre-computed values in the context of text generation. See the forward
+  method for more information on the usage of this argument.
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22a631162e2819aefe66e0fb7e8066db5e7e42fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ctrl.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+CTRLConfig
+[[autodoc]] CTRLConfig
+CTRLTokenizer
+[[autodoc]] CTRLTokenizer
+    - save_vocabulary
+
+CTRLModel
+[[autodoc]] CTRLModel
+    - forward
+CTRLLMHeadModel
+[[autodoc]] CTRLLMHeadModel
+    - forward
+CTRLForSequenceClassification
+[[autodoc]] CTRLForSequenceClassification
+    - forward
+
+TFCTRLModel
+[[autodoc]] TFCTRLModel
+    - call
+TFCTRLLMHeadModel
+[[autodoc]] TFCTRLLMHeadModel
+    - call
+TFCTRLForSequenceClassification
+[[autodoc]] TFCTRLForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27493f0be4ef49cb21a07b7effc7025461525f4a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Convolutional Vision Transformer (CvT)
+Overview
+The CvT model was proposed in CvT: Introducing Convolutions to Vision Transformers by Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan and Lei Zhang. The Convolutional vision Transformer (CvT) improves the Vision Transformer (ViT) in performance and efficiency by introducing convolutions into ViT to yield the best of both designs.
+The abstract from the paper is the following:
+We present in this paper a new architecture, named Convolutional vision Transformer (CvT), that improves Vision Transformer (ViT) 
+in performance and efficiency by introducing convolutions into ViT to yield the best of both designs. This is accomplished through 
+two primary modifications: a hierarchy of Transformers containing a new convolutional token embedding, and a convolutional Transformer 
+block leveraging a convolutional projection. These changes introduce desirable properties of convolutional neural networks (CNNs) 
+to the ViT architecture (\ie shift, scale, and distortion invariance) while maintaining the merits of Transformers (\ie dynamic attention, 
+global context, and better generalization). We validate CvT by conducting extensive experiments, showing that this approach achieves 
+state-of-the-art performance over other Vision Transformers and ResNets on ImageNet-1k, with fewer parameters and lower FLOPs. In addition, 
+performance gains are maintained when pretrained on larger datasets (\eg ImageNet-22k) and fine-tuned to downstream tasks. Pre-trained on 
+ImageNet-22k, our CvT-W24 obtains a top-1 accuracy of 87.7\% on the ImageNet-1k val set. Finally, our results show that the positional encoding, 
+a crucial component in existing Vision Transformers, can be safely removed in our model, simplifying the design for higher resolution vision tasks.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8e1ad1bc2f51c18f61875102db68b7ec2789323
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_cvt.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+CvT models are regular Vision Transformers, but trained with convolutions. They outperform the original model (ViT) when fine-tuned on ImageNet-1K and CIFAR-100.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here (you can just replace [ViTFeatureExtractor] by [AutoImageProcessor] and [ViTForImageClassification] by [CvtForImageClassification]).
+The available checkpoints are either (1) pre-trained on ImageNet-22k (a collection of 14 million images and 22k classes) only, (2) also fine-tuned on ImageNet-22k or (3) also fine-tuned on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with CvT.
+
+[CvtForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+CvtConfig
+[[autodoc]] CvtConfig
+
+CvtModel
+[[autodoc]] CvtModel
+    - forward
+CvtForImageClassification
+[[autodoc]] CvtForImageClassification
+    - forward
+
+TFCvtModel
+[[autodoc]] TFCvtModel
+    - call
+TFCvtForImageClassification
+[[autodoc]] TFCvtForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2af07973a20c1eb17c14183ee8c173ec9da1ad9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+Data2Vec
+Overview
+The Data2Vec model was proposed in data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language by Alexei Baevski, Wei-Ning Hsu, Qiantong Xu, Arun Babu, Jiatao Gu and Michael Auli.
+Data2Vec proposes a unified framework for self-supervised learning across different data modalities - text, audio and images.
+Importantly, predicted targets for pre-training are contextualized latent representations of the inputs, rather than modality-specific, context-independent targets.
+The abstract from the paper is the following:
+While the general idea of self-supervised learning is identical across modalities, the actual algorithms and
+objectives differ widely because they were developed with a single modality in mind. To get us closer to general
+self-supervised learning, we present data2vec, a framework that uses the same learning method for either speech,
+NLP or computer vision. The core idea is to predict latent representations of the full input data based on a
+masked view of the input in a selfdistillation setup using a standard Transformer architecture.
+Instead of predicting modality-specific targets such as words, visual tokens or units of human speech which
+are local in nature, data2vec predicts contextualized latent representations that contain information from
+the entire input. Experiments on the major benchmarks of speech recognition, image classification, and
+natural language understanding demonstrate a new state of the art or competitive performance to predominant approaches.
+Models and code are available at www.github.com/pytorch/fairseq/tree/master/examples/data2vec.
+This model was contributed by edugp and patrickvonplaten.
+sayakpaul and Rocketknight1 contributed Data2Vec for vision in TensorFlow.
+The original code (for NLP and Speech) can be found here.
+The original code for vision can be found here.
+Usage tips
+
+Data2VecAudio, Data2VecText, and Data2VecVision have all been trained using the same self-supervised learning method.
+For Data2VecAudio, preprocessing is identical to [Wav2Vec2Model], including feature extraction
+For Data2VecText, preprocessing is identical to [RobertaModel], including tokenization.
+For Data2VecVision, preprocessing is identical to [BeitModel], including feature extraction.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c5a33d66cf1aa058d41e62fca222efeff04ed5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Data2Vec.
+
+[Data2VecVisionForImageClassification] is supported by this example script and notebook.
+To fine-tune [TFData2VecVisionForImageClassification] on a custom dataset, see this notebook.
+
+Data2VecText documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+- Masked language modeling task guide
+- Multiple choice task guide
+Data2VecAudio documentation resources
+- Audio classification task guide
+- Automatic speech recognition task guide
+Data2VecVision documentation resources
+- Image classification
+- Semantic segmentation
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Data2VecTextConfig
+[[autodoc]] Data2VecTextConfig
+Data2VecAudioConfig
+[[autodoc]] Data2VecAudioConfig
+Data2VecVisionConfig
+[[autodoc]] Data2VecVisionConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d795b051c791ca3570dd36cdc540e897bce1f21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_data2vec.txt_chunk_2.txt
@@ -0,0 +1,55 @@
+Data2VecAudioModel
+[[autodoc]] Data2VecAudioModel
+    - forward
+Data2VecAudioForAudioFrameClassification
+[[autodoc]] Data2VecAudioForAudioFrameClassification
+    - forward
+Data2VecAudioForCTC
+[[autodoc]] Data2VecAudioForCTC
+    - forward
+Data2VecAudioForSequenceClassification
+[[autodoc]] Data2VecAudioForSequenceClassification
+    - forward
+Data2VecAudioForXVector
+[[autodoc]] Data2VecAudioForXVector
+    - forward
+Data2VecTextModel
+[[autodoc]] Data2VecTextModel
+    - forward
+Data2VecTextForCausalLM
+[[autodoc]] Data2VecTextForCausalLM
+    - forward
+Data2VecTextForMaskedLM
+[[autodoc]] Data2VecTextForMaskedLM
+    - forward
+Data2VecTextForSequenceClassification
+[[autodoc]] Data2VecTextForSequenceClassification
+    - forward
+Data2VecTextForMultipleChoice
+[[autodoc]] Data2VecTextForMultipleChoice
+    - forward
+Data2VecTextForTokenClassification
+[[autodoc]] Data2VecTextForTokenClassification
+    - forward
+Data2VecTextForQuestionAnswering
+[[autodoc]] Data2VecTextForQuestionAnswering
+    - forward
+Data2VecVisionModel
+[[autodoc]] Data2VecVisionModel
+    - forward
+Data2VecVisionForImageClassification
+[[autodoc]] Data2VecVisionForImageClassification
+    - forward
+Data2VecVisionForSemanticSegmentation
+[[autodoc]] Data2VecVisionForSemanticSegmentation
+    - forward
+
+TFData2VecVisionModel
+[[autodoc]] TFData2VecVisionModel
+    - call
+TFData2VecVisionForImageClassification
+[[autodoc]] TFData2VecVisionForImageClassification
+    - call
+TFData2VecVisionForSemanticSegmentation
+[[autodoc]] TFData2VecVisionForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473d76907e92f2391cafa664786c494ea8a74aa7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DBRX
+Overview
+DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction.
+It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input.
+It was pre-trained on 12T tokens of text and code data.
+Compared to other open MoE models like Mixtral-8x7B and Grok-1, DBRX is fine-grained, meaning it uses a larger number of smaller experts. DBRX has 16 experts and chooses 4, while Mixtral-8x7B and Grok-1 have 8 experts and choose 2.
+This provides 65x more possible combinations of experts and we found that this improves model quality.
+DBRX uses rotary position encodings (RoPE), gated linear units (GLU), and grouped query attention (GQA).
+It is a BPE based model and uses the GPT-4 tokenizer as described in the tiktoken repository.
+We made these choices based on exhaustive evaluation and scaling experiments.
+DBRX was pretrained on 12T tokens of carefully curated data and a maximum context length of 32K tokens.
+We estimate that this data is at least 2x better token-for-token than the data we used to pretrain the MPT family of models.
+This new dataset was developed using the full suite of Databricks tools, including Apache Spark™ and Databricks notebooks for data processing, and Unity Catalog for data management and governance.
+We used curriculum learning for pretraining, changing the data mix during training in ways we found to substantially improve model quality.
+More detailed information about DBRX Instruct and DBRX Base can be found in our technical blog post.
+This model was contributed by eitan-turok and abhi-db. The original code can be found here, though this may not be up to date.
+Usage Examples
+The generate() method can be used to generate text using DBRX. You can generate using the standard attention implementation, flash-attention, and the PyTorch scaled dot product attention. The last two attention implementations give speed ups.
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbae103bec9d55be4e62e1ac689ebbe7bacb178c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9bfb5b284398d2697c2dae67367b377a30a297b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dbrx.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+If you have flash-attention installed (pip install flash-attn), it is possible to generate faster. (The HuggingFace documentation for flash-attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="flash_attention_2",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+You can also generate faster using the PyTorch scaled dot product attention. (The HuggingFace documentation for scaled dot product attention can be found here.)
+thon
+from transformers import DbrxForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("databricks/dbrx-instruct", token="YOUR_HF_TOKEN")
+model = DbrxForCausalLM.from_pretrained(
+    "databricks/dbrx-instruct",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    token="YOUR_HF_TOKEN",
+    attn_implementation="sdpa",
+    )
+input_text = "What does it take to build a great LLM?"
+messages = [{"role": "user", "content": input_text}]
+input_ids = tokenizer.apply_chat_template(messages, return_dict=True, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, max_new_tokens=200)
+print(tokenizer.decode(outputs[0]))
+
+DbrxConfig
+[[autodoc]] DbrxConfig
+DbrxModel
+[[autodoc]] DbrxModel
+    - forward
+DbrxForCausalLM
+[[autodoc]] DbrxForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42f565760ce4877f3fd197cf4d14c377f118371b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+DeBERTa-v2
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+The following information is visible directly on the original implementation
+repository. DeBERTa v2 is the second version of the DeBERTa model. It includes
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab57a3c9e16cccf1b9d5a07d23b61ce5c56daf4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+the 1.5B model used for the SuperGLUE single-model submission and achieving 89.9, versus human baseline 89.8. You can
+find more details about this submission in the authors'
+blog
+New in v2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a0e188453739b46e48932e9905c679441553297
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Vocabulary In v2 the tokenizer is changed to use a new vocabulary of size 128K built from the training data.
+  Instead of a GPT2-based tokenizer, the tokenizer is now
+  sentencepiece-based tokenizer.
+nGiE(nGram Induced Input Encoding) The DeBERTa-v2 model uses an additional convolution layer aside with the first
+  transformer layer to better learn the local dependency of input tokens.
+Sharing position projection matrix with content projection matrix in attention layer Based on previous
+  experiments, this can save parameters without affecting the performance.
+Apply bucket to encode relative positions The DeBERTa-v2 model uses log bucket to encode relative positions
+  similar to T5.
+900M model & 1.5B model Two additional model sizes are available: 900M and 1.5B, which significantly improves the
+  performance of downstream tasks.
+
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+DebertaV2Config
+[[autodoc]] DebertaV2Config
+DebertaV2Tokenizer
+[[autodoc]] DebertaV2Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaV2TokenizerFast
+[[autodoc]] DebertaV2TokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca447f7a54efa34ecc0b25f1687884bfc28e1d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta-v2.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+DebertaV2Model
+[[autodoc]] DebertaV2Model
+    - forward
+DebertaV2PreTrainedModel
+[[autodoc]] DebertaV2PreTrainedModel
+    - forward
+DebertaV2ForMaskedLM
+[[autodoc]] DebertaV2ForMaskedLM
+    - forward
+DebertaV2ForSequenceClassification
+[[autodoc]] DebertaV2ForSequenceClassification
+    - forward
+DebertaV2ForTokenClassification
+[[autodoc]] DebertaV2ForTokenClassification
+    - forward
+DebertaV2ForQuestionAnswering
+[[autodoc]] DebertaV2ForQuestionAnswering
+    - forward
+DebertaV2ForMultipleChoice
+[[autodoc]] DebertaV2ForMultipleChoice
+    - forward
+
+TFDebertaV2Model
+[[autodoc]] TFDebertaV2Model
+    - call
+TFDebertaV2PreTrainedModel
+[[autodoc]] TFDebertaV2PreTrainedModel
+    - call
+TFDebertaV2ForMaskedLM
+[[autodoc]] TFDebertaV2ForMaskedLM
+    - call
+TFDebertaV2ForSequenceClassification
+[[autodoc]] TFDebertaV2ForSequenceClassification
+    - call
+TFDebertaV2ForTokenClassification
+[[autodoc]] TFDebertaV2ForTokenClassification
+    - call
+TFDebertaV2ForQuestionAnswering
+[[autodoc]] TFDebertaV2ForQuestionAnswering
+    - call
+TFDebertaV2ForMultipleChoice
+[[autodoc]] TFDebertaV2ForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b150345a32f065fda19d5e07db067e3fb986041
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DeBERTa
+Overview
+The DeBERTa model was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen It is based on Google's
+BERT model released in 2018 and Facebook's RoBERTa model released in 2019.
+It builds on RoBERTa with disentangled attention and enhanced mask decoder training with half of the data used in
+RoBERTa.
+The abstract from the paper is the following:
+Recent progress in pre-trained neural language models has significantly improved the performance of many natural
+language processing (NLP) tasks. In this paper we propose a new model architecture DeBERTa (Decoding-enhanced BERT with
+disentangled attention) that improves the BERT and RoBERTa models using two novel techniques. The first is the
+disentangled attention mechanism, where each word is represented using two vectors that encode its content and
+position, respectively, and the attention weights among words are computed using disentangled matrices on their
+contents and relative positions. Second, an enhanced mask decoder is used to replace the output softmax layer to
+predict the masked tokens for model pretraining. We show that these two techniques significantly improve the efficiency
+of model pretraining and performance of downstream tasks. Compared to RoBERTa-Large, a DeBERTa model trained on half of
+the training data performs consistently better on a wide range of NLP tasks, achieving improvements on MNLI by +0.9%
+(90.2% vs. 91.1%), on SQuAD v2.0 by +2.3% (88.4% vs. 90.7%) and RACE by +3.6% (83.2% vs. 86.8%). The DeBERTa code and
+pre-trained models will be made publicly available at https://github.com/microsoft/DeBERTa.
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d4bc07bcb66e395ee294de24defd0a187aced64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_1.txt
@@ -0,0 +1,4 @@
+This model was contributed by DeBERTa. This model TF 2.0 implementation was
+contributed by kamalkraj . The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8abc0314f4810c743444f8f4775ffb220262dff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_2.txt
@@ -0,0 +1,52 @@
+A blog post on how to Accelerate Large Model Training using DeepSpeed with DeBERTa.
+A blog post on Supercharged Customer Service with Machine Learning with DeBERTa.
+[DebertaForSequenceClassification] is supported by this example script and notebook.
+[TFDebertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DebertaForTokenClassification] is supported by this example script and notebook.
+[TFDebertaForTokenClassification] is supported by this example script and notebook.
+Token classification chapter of the 🤗 Hugging Face Course.
+Byte-Pair Encoding tokenization chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DebertaForMaskedLM] is supported by this example script and notebook.
+[TFDebertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
+
+[DebertaForQuestionAnswering] is supported by this example script and notebook.
+[TFDebertaForQuestionAnswering] is supported by this example script and notebook.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+DebertaConfig
+[[autodoc]] DebertaConfig
+DebertaTokenizer
+[[autodoc]] DebertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+DebertaTokenizerFast
+[[autodoc]] DebertaTokenizerFast
+    - build_inputs_with_special_tokens
+    - create_token_type_ids_from_sequences
+
+DebertaModel
+[[autodoc]] DebertaModel
+    - forward
+DebertaPreTrainedModel
+[[autodoc]] DebertaPreTrainedModel
+DebertaForMaskedLM
+[[autodoc]] DebertaForMaskedLM
+    - forward
+DebertaForSequenceClassification
+[[autodoc]] DebertaForSequenceClassification
+    - forward
+DebertaForTokenClassification
+[[autodoc]] DebertaForTokenClassification
+    - forward
+DebertaForQuestionAnswering
+[[autodoc]] DebertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f0baeecaa3ba7cae904f4875f79eddfd00cd3b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deberta.txt_chunk_3.txt
@@ -0,0 +1,18 @@
+TFDebertaModel
+[[autodoc]] TFDebertaModel
+    - call
+TFDebertaPreTrainedModel
+[[autodoc]] TFDebertaPreTrainedModel
+    - call
+TFDebertaForMaskedLM
+[[autodoc]] TFDebertaForMaskedLM
+    - call
+TFDebertaForSequenceClassification
+[[autodoc]] TFDebertaForSequenceClassification
+    - call
+TFDebertaForTokenClassification
+[[autodoc]] TFDebertaForTokenClassification
+    - call
+TFDebertaForQuestionAnswering
+[[autodoc]] TFDebertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830818086c7d7f27b71c63cdc75c58e3e3436ecf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_decision_transformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Decision Transformer
+Overview
+The Decision Transformer model was proposed in Decision Transformer: Reinforcement Learning via Sequence Modeling
+by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
+The abstract from the paper is the following:
+We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. 
+This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances
+ in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that 
+ casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or 
+ compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked 
+ Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our 
+ Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, 
+ Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on 
+ Atari, OpenAI Gym, and Key-to-Door tasks.
+This version of the model is for tasks where the state is a vector.
+This model was contributed by edbeeching. The original code can be found here.
+DecisionTransformerConfig
+[[autodoc]] DecisionTransformerConfig
+DecisionTransformerGPT2Model
+[[autodoc]] DecisionTransformerGPT2Model
+    - forward
+DecisionTransformerModel
+[[autodoc]] DecisionTransformerModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cab5342339b0ed954627c25d19e81dad6136d937
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+Deformable DETR
+Overview
+The Deformable DETR model was proposed in Deformable DETR: Deformable Transformers for End-to-End Object Detection by Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, Jifeng Dai.
+Deformable DETR mitigates the slow convergence issues and limited feature spatial resolution of the original DETR by leveraging a new deformable attention module which only attends to a small set of key sampling points around a reference.
+The abstract from the paper is the following:
+DETR has been recently proposed to eliminate the need for many hand-designed components in object detection while demonstrating good performance. However, it suffers from slow convergence and limited feature spatial resolution, due to the limitation of Transformer attention modules in processing image feature maps. To mitigate these issues, we proposed Deformable DETR, whose attention modules only attend to a small set of key sampling points around a reference. Deformable DETR can achieve better performance than DETR (especially on small objects) with 10 times less training epochs. Extensive experiments on the COCO benchmark demonstrate the effectiveness of our approach.
+
+ Deformable DETR architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+Training Deformable DETR is equivalent to training the original DETR model. See the resources section below for demo notebooks.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Deformable DETR.
+
+Demo notebooks regarding inference + fine-tuning on a custom dataset for [DeformableDetrForObjectDetection] can be found here.
+Scripts for finetuning [DeformableDetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7e618d79125a55825992fffc3d9d366fa6b9a13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deformable_detr.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeformableDetrImageProcessor
+[[autodoc]] DeformableDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+DeformableDetrFeatureExtractor
+[[autodoc]] DeformableDetrFeatureExtractor
+    - call
+    - post_process_object_detection
+DeformableDetrConfig
+[[autodoc]] DeformableDetrConfig
+DeformableDetrModel
+[[autodoc]] DeformableDetrModel
+    - forward
+DeformableDetrForObjectDetection
+[[autodoc]] DeformableDetrForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ec100e6a99ad8eb66c124c6234f4d0892fa859d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+DeiT
+Overview
+The DeiT model was proposed in Training data-efficient image transformers & distillation through attention by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre
+Sablayrolles, Hervé Jégou. The Vision Transformer (ViT) introduced in Dosovitskiy et al., 2020 has shown that one can match or even outperform existing convolutional neural
+networks using a Transformer encoder (BERT-like). However, the ViT models introduced in that paper required training on
+expensive infrastructure for multiple weeks, using external data. DeiT (data-efficient image transformers) are more
+efficiently trained transformers for image classification, requiring far less data and far less computing resources
+compared to the original ViT models.
+The abstract from the paper is the following:
+Recently, neural networks purely based on attention were shown to address image understanding tasks such as image
+classification. However, these visual transformers are pre-trained with hundreds of millions of images using an
+expensive infrastructure, thereby limiting their adoption. In this work, we produce a competitive convolution-free
+transformer by training on Imagenet only. We train them on a single computer in less than 3 days. Our reference vision
+transformer (86M parameters) achieves top-1 accuracy of 83.1% (single-crop evaluation) on ImageNet with no external
+data. More importantly, we introduce a teacher-student strategy specific to transformers. It relies on a distillation
+token ensuring that the student learns from the teacher through attention. We show the interest of this token-based
+distillation, especially when using a convnet as a teacher. This leads us to report results competitive with convnets
+for both Imagenet (where we obtain up to 85.2% accuracy) and when transferring to other tasks. We share our code and
+models.
+This model was contributed by nielsr. The TensorFlow version of this model was added by amyeroberts.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d93aef744b0dec07e0eb4ca1cdb3d12ceb49757d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+Compared to ViT, DeiT models use a so-called distillation token to effectively learn from a teacher (which, in the
+  DeiT paper, is a ResNet like-model). The distillation token is learned through backpropagation, by interacting with
+  the class ([CLS]) and patch tokens through the self-attention layers.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state of the class token and not using the distillation signal, or (2) by placing both a
+  prediction head on top of the class token and on top of the distillation token. In that case, the [CLS] prediction
+  head is trained using regular cross-entropy between the prediction of the head and the ground-truth label, while the
+  distillation prediction head is trained using hard distillation (cross-entropy between the prediction of the
+  distillation head and the label predicted by the teacher). At inference time, one takes the average prediction
+  between both heads as final prediction. (2) is also called "fine-tuning with distillation", because one relies on a
+  teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds to
+  [DeiTForImageClassification] and (2) corresponds to
+  [DeiTForImageClassificationWithTeacher].
+Note that the authors also did try soft distillation for (2) (in which case the distillation prediction head is
+  trained using KL divergence to match the softmax output of the teacher), but hard distillation gave the best results.
+All released checkpoints were pre-trained and fine-tuned on ImageNet-1k only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into
+  [ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c3017292d56f72c45ac8da5e4d686e1980f61c4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+[ViTModel] or [ViTForImageClassification]. Techniques like data
+  augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). There are 4 variants available (in 3 different sizes):
+  facebook/deit-tiny-patch16-224, facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and
+  facebook/deit-base-patch16-384. Note that one should use [DeiTImageProcessor] in order to
+  prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd3e0dbdf492f4e3018750f0a05cfd8021fbed28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import DeiTForImageClassification
+model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/deit-base-distilled-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         8 |                                         6 |                      1.33 |
+|            2 |                                         9 |                                         6 |                      1.5  |
+|            4 |                                         9 |                                         6 |                      1.5  |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DeiT.
+
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..592d73497cca053edb0a37479526eb1e9754658a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deit.txt_chunk_4.txt
@@ -0,0 +1,42 @@
+[DeiTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[DeiTForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DeiTConfig
+[[autodoc]] DeiTConfig
+DeiTFeatureExtractor
+[[autodoc]] DeiTFeatureExtractor
+    - call
+DeiTImageProcessor
+[[autodoc]] DeiTImageProcessor
+    - preprocess
+
+DeiTModel
+[[autodoc]] DeiTModel
+    - forward
+DeiTForMaskedImageModeling
+[[autodoc]] DeiTForMaskedImageModeling
+    - forward
+DeiTForImageClassification
+[[autodoc]] DeiTForImageClassification
+    - forward
+DeiTForImageClassificationWithTeacher
+[[autodoc]] DeiTForImageClassificationWithTeacher
+    - forward
+
+TFDeiTModel
+[[autodoc]] TFDeiTModel
+    - call
+TFDeiTForMaskedImageModeling
+[[autodoc]] TFDeiTForMaskedImageModeling
+    - call
+TFDeiTForImageClassification
+[[autodoc]] TFDeiTForImageClassification
+    - call
+TFDeiTForImageClassificationWithTeacher
+[[autodoc]] TFDeiTForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f27480ffa62a8272400f66b221fd605f4711c67d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+DePlot
+Overview
+DePlot was proposed in the paper DePlot: One-shot visual language reasoning by plot-to-table translation from Fangyu Liu, Julian Martin Eisenschlos, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Wenhu Chen, Nigel Collier, Yasemin Altun.
+The abstract of the paper states the following:
+Visual language such as charts and plots is ubiquitous in the human world. Comprehending plots and charts requires strong reasoning skills. Prior state-of-the-art (SOTA) models require at least tens of thousands of training examples and their reasoning capabilities are still much limited, especially on complex human-written queries. This paper presents the first one-shot solution to visual language reasoning. We decompose the challenge of visual language reasoning into two steps: (1) plot-to-text translation, and (2) reasoning over the translated text. The key in this method is a modality conversion module, named as DePlot, which translates the image of a plot or chart to a linearized table. The output of DePlot can then be directly used to prompt a pretrained large language model (LLM), exploiting the few-shot reasoning capabilities of LLMs. To obtain DePlot, we standardize the plot-to-table task by establishing unified task formats and metrics, and train DePlot end-to-end on this task. DePlot can then be used off-the-shelf together with LLMs in a plug-and-play fashion. Compared with a SOTA model finetuned on more than >28k data points, DePlot+LLM with just one-shot prompting achieves a 24.0% improvement over finetuned SOTA on human-written queries from the task of chart QA.
+DePlot is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+DePlot is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage example
+Currently one checkpoint is available for DePlot:
+
+google/deplot: DePlot fine-tuned on ChartQA dataset
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6be1b58f54a9b54bd3b572c2606ba6d5086506c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deplot.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+google/deplot: DePlot fine-tuned on ChartQA dataset 
+
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/deplot")
+processor = AutoProcessor.from_pretrained("google/deplot")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/5090.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Generate underlying data table of the figure below:", return_tensors="pt")
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
+
+Fine-tuning
+To fine-tune DePlot, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faster convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+DePlot is a model trained using Pix2Struct architecture. For API reference, see Pix2Struct documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91674c094b8182f9b6233a1346ffec31d070a1a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Depth Anything
+Overview
+The Depth Anything model was proposed in Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao. Depth Anything is based on the DPT architecture, trained on ~62 million images, obtaining state-of-the-art results for both relative and absolute depth estimation.
+The abstract from the paper is the following:
+This work presents Depth Anything, a highly practical solution for robust monocular depth estimation. Without pursuing novel technical modules, we aim to build a simple yet powerful foundation model dealing with any images under any circumstances. To this end, we scale up the dataset by designing a data engine to collect and automatically annotate large-scale unlabeled data (~62M), which significantly enlarges the data coverage and thus is able to reduce the generalization error. We investigate two simple yet effective strategies that make data scaling-up promising. First, a more challenging optimization target is created by leveraging data augmentation tools. It compels the model to actively seek extra visual knowledge and acquire robust representations. Second, an auxiliary supervision is developed to enforce the model to inherit rich semantic priors from pre-trained encoders. We evaluate its zero-shot capabilities extensively, including six public datasets and randomly captured photos. It demonstrates impressive generalization ability. Further, through fine-tuning it with metric depth information from NYUv2 and KITTI, new SOTAs are set. Our better depth model also results in a better depth-conditioned ControlNet.
+
+ Depth Anything overview. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use Depth Anything: either using the pipeline API, which abstracts away all the complexity for you, or by using the DepthAnythingForDepthEstimation class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+pipe = pipeline(task="depth-estimation", model="LiheYoung/depth-anything-small-hf")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+depth = pipe(image)["depth"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cbf4204a764f0a39f4861b2ee0d9c1aaa67035c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_depth_anything.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-small-hf")
+prepare image for the model
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+     predicted_depth = outputs.predicted_depth
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ )
+visualize the prediction
+output = prediction.squeeze().cpu().numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Depth Anything.
+
+Monocular depth estimation task guide
+A notebook showcasing inference with [DepthAnythingForDepthEstimation] can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DepthAnythingConfig
+[[autodoc]] DepthAnythingConfig
+DepthAnythingForDepthEstimation
+[[autodoc]] DepthAnythingForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4326804d9e842999b3286796b3403a8d478c43d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+DETA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The DETA model was proposed in NMS Strikes Back by Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl.
+DETA (short for Detection Transformers with Assignment) improves Deformable DETR by replacing the one-to-one bipartite Hungarian matching loss
+with one-to-many label assignments used in traditional detectors with non-maximum suppression (NMS). This leads to significant gains of up to 2.5 mAP.
+The abstract from the paper is the following:
+Detection Transformer (DETR) directly transforms queries to unique objects by using one-to-one bipartite matching during training and enables end-to-end object detection. Recently, these models have surpassed traditional detectors on COCO with undeniable elegance. However, they differ from traditional detectors in multiple designs, including model architecture and training schedules, and thus the effectiveness of one-to-one matching is not fully understood. In this work, we conduct a strict comparison between the one-to-one Hungarian matching in DETRs and the one-to-many label assignments in traditional detectors with non-maximum supervision (NMS). Surprisingly, we observe one-to-many assignments with NMS consistently outperform standard one-to-one matching under the same setting, with a significant gain of up to 2.5 mAP. Our detector that trains Deformable-DETR with traditional IoU-based label assignment achieved 50.2 COCO mAP within 12 epochs (1x schedule) with ResNet50 backbone, outperforming all existing traditional or transformer-based detectors in this setting. On multiple datasets, schedules, and architectures, we consistently show bipartite matching is unnecessary for performant detection transformers. Furthermore, we attribute the success of detection transformers to their expressive transformer architecture.
+
+ DETA overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETA.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fffaee363820548e755b340b24f8a3077b2b012
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_deta.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Demo notebooks for DETA can be found here.
+Scripts for finetuning [DetaForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetaConfig
+[[autodoc]] DetaConfig
+DetaImageProcessor
+[[autodoc]] DetaImageProcessor
+    - preprocess
+    - post_process_object_detection
+DetaModel
+[[autodoc]] DetaModel
+    - forward
+DetaForObjectDetection
+[[autodoc]] DetaForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62e2c2b6dd716f42ac9a238f2fc6fd290c362ef9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+DETR
+Overview
+The DETR model was proposed in End-to-End Object Detection with Transformers by
+Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR
+consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for
+object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use
+things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be
+naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.
+The abstract from the paper is the following:
+We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the
+detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression
+procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the
+new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via
+bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries,
+DETR reasons about the relations of the objects and the global image context to directly output the final set of
+predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many
+other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and
+highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily
+generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive
+baselines.
+This model was contributed by nielsr. The original code can be found here.
+How DETR works
+Here's a TLDR explaining how [~transformers.DetrForObjectDetection] works:
+First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14efe77daa1457dac881bd157b68127fd90a1a64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+ResNet-50/ResNet-101). Let's assume we also add a batch dimension. This means that the input to the backbone is a
+tensor of shape (batch_size, 3, height, width), assuming the image has 3 color channels (RGB). The CNN backbone
+outputs a new lower-resolution feature map, typically of shape (batch_size, 2048, height/32, width/32). This is
+then projected to match the hidden dimension of the Transformer of DETR, which is 256 by default, using a
+nn.Conv2D layer. So now, we have a tensor of shape (batch_size, 256, height/32, width/32). Next, the
+feature map is flattened and transposed to obtain a tensor of shape (batch_size, seq_len, d_model) =
+(batch_size, width/32*height/32, 256). So a difference with NLP models is that the sequence length is actually
+longer than usual, but with a smaller d_model (which in NLP is typically 768 or higher).
+Next, this is sent through the encoder, outputting encoder_hidden_states of the same shape (you can consider
+these as image features). Next, so-called object queries are sent through the decoder. This is a tensor of shape
+(batch_size, num_queries, d_model), with num_queries typically set to 100 and initialized with zeros.
+These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to
+the encoder, they are added to the input of each attention layer. Each object query will look for a particular object
+in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers
+to output decoder_hidden_states of the same shape: (batch_size, num_queries, d_model). Next, two heads
+are added on top for object detection: a linear layer for classifying each object query into one of the objects or "no
+object", and a MLP to predict bounding boxes for each query.
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a1ee3b66a586944744b043dc38002a7f5739d22
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The model is trained using a bipartite matching loss: so what we actually do is compare the predicted classes +
+bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N
+(so if an image only contains 4 objects, 96 annotations will just have a "no object" as class and "no bounding box" as
+bounding box). The Hungarian matching algorithm is used to find
+an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for
+the classes) and a linear combination of the L1 and generalized IoU loss (for the
+bounding boxes) are used to optimize the parameters of the model.
+DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance
+segmentation). [~transformers.DetrForSegmentation] adds a segmentation mask head on top of
+[~transformers.DetrForObjectDetection]. The mask head can be trained either jointly, or in a two steps process,
+where one first trains a [~transformers.DetrForObjectDetection] model to detect bounding boxes around both
+"things" (instances) and "stuff" (background things like trees, roads, sky), then freeze all the weights and train only
+the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is
+required for the training to be possible, since the Hungarian matching is computed using distances between boxes.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..430f0496825184b8b14063d1e7ba966ea6b13c1c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+DETR uses so-called object queries to detect objects in an image. The number of queries determines the maximum
+  number of objects that can be detected in a single image, and is set to 100 by default (see parameter
+  num_queries of [~transformers.DetrConfig]). Note that it's good to have some slack (in COCO, the
+  authors used 100, while the maximum number of objects in a COCO image is ~70).
+The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2,
+  which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.
+DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting
+  to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned
+  absolute position embeddings. By default, the parameter position_embedding_type of
+  [~transformers.DetrConfig] is set to "sine".
+During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help
+  the model output the correct number of objects of each class. If you set the parameter auxiliary_loss of
+  [~transformers.DetrConfig] to True, then prediction feedforward neural networks and Hungarian losses
+  are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  num_boxes variable in the DetrLoss class of modeling_detr.py. When training on multiple nodes, this should be
+  set to the average number of target boxes across all nodes, as can be seen in the original implementation here.
+[~transformers.DetrForObjectDetection] and [~transformers.DetrForSegmentation] can be initialized with
+  any convolutional backbone available in the timm library.
+  Initializing with a MobileNet backbone for example can be done by setting the backbone attribute of
+  [~transformers.DetrConfig] to "tf_mobilenetv3_small_075", and then initializing the model with that
+  config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb559d05fbad6adbaf00e4d3e506101a8b6289b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_4.txt
@@ -0,0 +1,11 @@
+config.
+DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is
+  at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at
+  least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use
+  [~transformers.DetrImageProcessor] to prepare images (and optional annotations in COCO format) for the
+  model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the
+  largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding.
+  Alternatively, one can also define a custom collate_fn in order to batch images together, using
+  [~transformers.DetrImageProcessor.pad_and_create_pixel_mask].
+The size of the images will determine the amount of memory being used, and will thus determine the batch_size.
+  It is advised to use a batch size of 2 per GPU. See this Github thread for more info.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2657cc710260edc6ba793dcaa47d74777ddce399
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+There are three ways to instantiate a DETR model (depending on what you prefer):
+Option 1: Instantiate DETR with pre-trained weights for entire model
+
+from transformers import DetrForObjectDetection
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+
+Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone
+
+from transformers import DetrConfig, DetrForObjectDetection
+config = DetrConfig()
+model = DetrForObjectDetection(config)
+Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformerpy
+config = DetrConfig(use_pretrained_backbone=False)
+model = DetrForObjectDetection(config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0ccdde53aeb4ca00f4a3dc27bcefe92203ad4a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_6.txt
@@ -0,0 +1,12 @@
+As a summary, consider the following table:
+| Task | Object detection | Instance segmentation | Panoptic segmentation |
+|------|------------------|-----------------------|-----------------------|
+| Description | Predicting bounding boxes and class labels around objects in an image | Predicting masks around objects (i.e. instances) in an image | Predicting masks around both objects (i.e. instances) as well as "stuff" (i.e. background things like trees and roads) in an image |
+| Model | [~transformers.DetrForObjectDetection] | [~transformers.DetrForSegmentation] | [~transformers.DetrForSegmentation] |
+| Example dataset | COCO detection | COCO detection, COCO panoptic | COCO panoptic  |                                                                        |
+| Format of annotations to provide to  [~transformers.DetrImageProcessor] | {'image_id': int, 'annotations': List[Dict]} each Dict being a COCO object annotation  | {'image_id': int, 'annotations': List[Dict]}  (in case of COCO detection) or {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} (in case of COCO panoptic) | {'file_name': str, 'image_id': int, 'segments_info': List[Dict]} and masks_path (path to directory containing PNG files of the masks) |
+| Postprocessing (i.e. converting the output of the model to Pascal VOC format) | [~transformers.DetrImageProcessor.post_process] | [~transformers.DetrImageProcessor.post_process_segmentation] | [~transformers.DetrImageProcessor.post_process_segmentation], [~transformers.DetrImageProcessor.post_process_panoptic] |
+| evaluators | CocoEvaluator with iou_types="bbox" | CocoEvaluator with iou_types="bbox" or "segm" | CocoEvaluator with iou_tupes="bbox" or "segm", PanopticEvaluator |
+In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ea2e85cbab985a14f9dd9093e17e6e93b2df477
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_7.txt
@@ -0,0 +1,7 @@
+[~transformers.DetrImageProcessor] to create pixel_values, pixel_mask and optional
+labels, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the
+outputs of the model using one of the postprocessing methods of [~transformers.DetrImageProcessor]. These can
+be be provided to either CocoEvaluator or PanopticEvaluator, which allow you to calculate metrics like
+mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the original repository. See the example notebooks for more info regarding evaluation.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95b36f3388b2d7ee7b1ef13559ff19b90986e530
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_detr.txt_chunk_8.txt
@@ -0,0 +1,34 @@
+All example notebooks illustrating fine-tuning [DetrForObjectDetection] and [DetrForSegmentation] on a custom dataset can be found here.
+Scripts for finetuning [DetrForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DetrConfig
+[[autodoc]] DetrConfig
+DetrImageProcessor
+[[autodoc]] DetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DetrFeatureExtractor
+[[autodoc]] DetrFeatureExtractor
+    - call
+    - post_process_object_detection
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+DETR specific outputs
+[[autodoc]] models.detr.modeling_detr.DetrModelOutput
+[[autodoc]] models.detr.modeling_detr.DetrObjectDetectionOutput
+[[autodoc]] models.detr.modeling_detr.DetrSegmentationOutput
+DetrModel
+[[autodoc]] DetrModel
+    - forward
+DetrForObjectDetection
+[[autodoc]] DetrForObjectDetection
+    - forward
+DetrForSegmentation
+[[autodoc]] DetrForSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7f2d6600e4966956a863bcf443417941d9ce83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dialogpt.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DialoGPT
+Overview
+DialoGPT was proposed in DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao,
+Jianfeng Gao, Jingjing Liu, Bill Dolan. It's a GPT2 Model trained on 147M conversation-like exchanges extracted from
+Reddit.
+The abstract from the paper is the following:
+We present a large, tunable neural conversational response generation model, DialoGPT (dialogue generative pre-trained
+transformer). Trained on 147M conversation-like exchanges extracted from Reddit comment chains over a period spanning
+from 2005 through 2017, DialoGPT extends the Hugging Face PyTorch transformer to attain a performance close to human
+both in terms of automatic and human evaluation in single-turn dialogue settings. We show that conversational systems
+that leverage DialoGPT generate more relevant, contentful and context-consistent responses than strong baseline
+systems. The pre-trained model and training pipeline are publicly released to facilitate research into neural response
+generation and the development of more intelligent open-domain dialogue systems.
+The original code can be found here.
+Usage tips
+
+DialoGPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+DialoGPT was trained with a causal language modeling (CLM) objective on conversational data and is therefore powerful
+  at response generation in open-domain dialogue systems.
+DialoGPT enables the user to create a chat bot in just 10 lines of code as shown on DialoGPT's model card.
+
+Training:
+In order to train or fine-tune DialoGPT, one can use causal language modeling training. To cite the official paper: We
+follow the OpenAI GPT-2 to model a multiturn dialogue session as a long text and frame the generation task as language
+modeling. We first concatenate all dialog turns within a dialogue session into a long text x_1,, x_N (N is the
+sequence length), ended by the end-of-text token. For more information please confer to the original paper.
+
+DialoGPT's architecture is based on the GPT2 model, refer to GPT2's documentation page for API reference and examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75013820afb1f37b1c2a61c06303df64f94e7cc2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Dilated Neighborhood Attention Transformer
+Overview
+DiNAT was proposed in Dilated Neighborhood Attention Transformer
+by Ali Hassani and Humphrey Shi.
+It extends NAT by adding a Dilated Neighborhood Attention pattern to capture global context,
+and shows significant performance improvements over it.
+The abstract from the paper is the following:
+*Transformers are quickly becoming one of the most heavily applied deep learning architectures across modalities,
+domains, and tasks. In vision, on top of ongoing efforts into plain transformers, hierarchical transformers have
+also gained significant attention, thanks to their performance and easy integration into existing frameworks.
+These models typically employ localized attention mechanisms, such as the sliding-window Neighborhood Attention (NA)
+or Swin Transformer's Shifted Window Self Attention. While effective at reducing self attention's quadratic complexity,
+local attention weakens two of the most desirable properties of self attention: long range inter-dependency modeling,
+and global receptive field. In this paper, we introduce Dilated Neighborhood Attention (DiNA), a natural, flexible and
+efficient extension to NA that can capture more global context and expand receptive fields exponentially at no
+additional cost. NA's local attention and DiNA's sparse global attention complement each other, and therefore we
+introduce Dilated Neighborhood Attention Transformer (DiNAT), a new hierarchical vision transformer built upon both.
+DiNAT variants enjoy significant improvements over strong baselines such as NAT, Swin, and ConvNeXt.
+Our large model is faster and ahead of its Swin counterpart by 1.5% box AP in COCO object detection,
+1.3% mask AP in COCO instance segmentation, and 1.1% mIoU in ADE20K semantic segmentation.
+Paired with new frameworks, our large variant is the new state of the art panoptic segmentation model on COCO (58.2 PQ)
+and ADE20K (48.5 PQ), and instance segmentation model on Cityscapes (44.5 AP) and ADE20K (35.4 AP) (no extra data).
+It also matches the state of the art specialized semantic segmentation models on ADE20K (58.2 mIoU),
+and ranks second on Cityscapes (84.5 mIoU) (no extra data). *
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8711dae62976a348ce164d682bedaa12cf63d4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinat.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Neighborhood Attention with different dilation values.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+DiNAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, height, width, num_channels).
+Notes:
+- DiNAT depends on NATTEN's implementation of Neighborhood Attention and Dilated Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten, or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiNAT.
+
+[DinatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DinatConfig
+[[autodoc]] DinatConfig
+DinatModel
+[[autodoc]] DinatModel
+    - forward
+DinatForImageClassification
+[[autodoc]] DinatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56950a9696f9e0c2402c7beb63951f8486d14d28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+DINOv2
+Overview
+The DINOv2 model was proposed in DINOv2: Learning Robust Visual Features without Supervision by
+Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
+DINOv2 is an upgrade of DINO, a self-supervised method applied on Vision Transformers. This method enables all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning.
+The abstract from the paper is the following:
+The recent breakthroughs in natural language processing for model pretraining on large quantities of data have opened the way for similar foundation models in computer vision. These models could greatly simplify the use of images in any system by producing all-purpose visual features, i.e., features that work across image distributions and tasks without finetuning. This work shows that existing pretraining methods, especially self-supervised methods, can produce such features if trained on enough curated data from diverse sources. We revisit existing approaches and combine different techniques to scale our pretraining in terms of data and model size. Most of the technical contributions aim at accelerating and stabilizing the training at scale. In terms of data, we propose an automatic pipeline to build a dedicated, diverse, and curated image dataset instead of uncurated data, as typically done in the self-supervised literature. In terms of models, we train a ViT model (Dosovitskiy et al., 2020) with 1B parameters and distill it into a series of smaller models that surpass the best available all-purpose features, OpenCLIP (Ilharco et al., 2021) on most of the benchmarks at image and pixel levels.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42dd5ff3b345adc55d7d2429a12f1f9b644fa8a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+The model can be traced using torch.jit.trace which leverages JIT compilation to optimize the model making it faster to run. Note this still produces some mis-matched elements and the difference between the original model and the traced model is of the order of 1e-4.
+thon
+import torch
+from transformers import AutoImageProcessor, AutoModel
+from PIL import Image
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
+model = AutoModel.from_pretrained('facebook/dinov2-base')
+inputs = processor(images=image, return_tensors="pt")
+outputs = model(**inputs)
+last_hidden_states = outputs[0]
+We have to force return_dict=False for tracing
+model.config.return_dict = False
+with torch.no_grad():
+    traced_model = torch.jit.trace(model, [inputs.pixel_values])
+    traced_outputs = traced_model(inputs.pixel_values)
+print((last_hidden_states - traced_outputs[0]).abs().max())
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..823519b352bbd3a0cbdb2b431cf6f68cea934ce4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dinov2.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for DINOv2 can be found here. 🌎
+
+[Dinov2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Dinov2Config
+[[autodoc]] Dinov2Config
+Dinov2Model
+[[autodoc]] Dinov2Model
+    - forward
+Dinov2ForImageClassification
+[[autodoc]] Dinov2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..400b362a42ebc97a4919b08af2a8982ae45cc4b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+DistilBERT
+
+Overview
+The DistilBERT model was proposed in the blog post Smaller, faster, cheaper, lighter: Introducing DistilBERT, a
+distilled version of BERT, and the paper DistilBERT, a
+distilled version of BERT: smaller, faster, cheaper and lighter. DistilBERT is a
+small, fast, cheap and light Transformer model trained by distilling BERT base. It has 40% less parameters than
+google-bert/bert-base-uncased, runs 60% faster while preserving over 95% of BERT's performances as measured on the GLUE language
+understanding benchmark.
+The abstract from the paper is the following:
+As Transfer Learning from large-scale pre-trained models becomes more prevalent in Natural Language Processing (NLP),
+operating these large models in on-the-edge and/or under constrained computational training or inference budgets
+remains challenging. In this work, we propose a method to pre-train a smaller general-purpose language representation
+model, called DistilBERT, which can then be fine-tuned with good performances on a wide range of tasks like its larger
+counterparts. While most prior work investigated the use of distillation for building task-specific models, we leverage
+knowledge distillation during the pretraining phase and show that it is possible to reduce the size of a BERT model by
+40%, while retaining 97% of its language understanding capabilities and being 60% faster. To leverage the inductive
+biases learned by larger models during pretraining, we introduce a triple loss combining language modeling,
+distillation and cosine-distance losses. Our smaller, faster and lighter model is cheaper to pre-train and we
+demonstrate its capabilities for on-device computations in a proof-of-concept experiment and a comparative on-device
+study.
+This model was contributed by victorsanh. This model jax version was
+contributed by kamalkraj. The original code can be found here.
+Usage tips
+
+DistilBERT doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or [SEP]).
+DistilBERT doesn't have options to select the input positions (position_ids input). This could be added if
+  necessary though, just let us know if you need this option.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cf948e0a588d8bc5f63c6ae11235bac45701fadc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Same as BERT but smaller. Trained by distillation of the pretrained BERT model, meaning it’s been trained to predict the same probabilities as the larger model. The actual objective is a combination of:
+
+finding the same probabilities as the teacher model
+predicting the masked tokens correctly (but no next-sentence objective)
+a cosine similarity between the hidden states of the student and the teacher model
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DistilBERT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on Getting Started with Sentiment Analysis using Python with DistilBERT.
+A blog post on how to train DistilBERT with Blurr for sequence classification.
+A blog post on how to use Ray to tune DistilBERT hyperparameters.
+A blog post on how to train DistilBERT with Hugging Face and Amazon SageMaker.
+A notebook on how to finetune DistilBERT for multi-label classification. 🌎
+A notebook on how to finetune DistilBERT for multiclass classification with PyTorch. 🌎
+A notebook on how to finetune DistilBERT for text classification in TensorFlow. 🌎
+[DistilBertForSequenceClassification] is supported by this example script and notebook.
+[TFDistilBertForSequenceClassification] is supported by this example script and notebook.
+[FlaxDistilBertForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[DistilBertForTokenClassification] is supported by this example script and notebook.
+[TFDistilBertForTokenClassification] is supported by this example script and notebook.
+[FlaxDistilBertForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[DistilBertForMaskedLM] is supported by this example script and notebook.
+[TFDistilBertForMaskedLM] is supported by this example script and notebook.
+[FlaxDistilBertForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce48cbd832362905d52d0b239b034133a02ae67a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+[DistilBertForQuestionAnswering] is supported by this example script and notebook.
+[TFDistilBertForQuestionAnswering] is supported by this example script and notebook.
+[FlaxDistilBertForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [DistilBertForMultipleChoice] is supported by this example script and notebook.
+- [TFDistilBertForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+⚗️ Optimization
+
+A blog post on how to quantize DistilBERT with 🤗 Optimum and Intel.
+A blog post on how Optimizing Transformers for GPUs with 🤗 Optimum.
+A blog post on Optimizing Transformers with Hugging Face Optimum.
+
+⚡️ Inference
+
+A blog post on how to Accelerate BERT inference with Hugging Face Transformers and AWS Inferentia with DistilBERT.
+A blog post on Serverless Inference with Hugging Face's Transformers, DistilBERT and Amazon SageMaker.
+
+🚀 Deploy
+
+A blog post on how to deploy DistilBERT on Google Cloud.
+A blog post on how to deploy DistilBERT with Amazon SageMaker.
+A blog post on how to Deploy BERT with Hugging Face Transformers, Amazon SageMaker and Terraform module.
+
+Combining DistilBERT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..168a39765dffa90707fcdf18da240f00ef15e56f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_3.txt
@@ -0,0 +1,54 @@
+import torch
+from transformers import AutoTokenizer, AutoModel
+device = "cuda" # the device to load the model onto
+tokenizer = AutoTokenizer.from_pretrained('distilbert/distilbert-base-uncased')
+model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+text = "Replace me by any text you'd like."
+encoded_input = tokenizer(text, return_tensors='pt').to(device)
+model.to(device)
+output = model(**encoded_input)
+
+DistilBertConfig
+[[autodoc]] DistilBertConfig
+DistilBertTokenizer
+[[autodoc]] DistilBertTokenizer
+DistilBertTokenizerFast
+[[autodoc]] DistilBertTokenizerFast
+
+DistilBertModel
+[[autodoc]] DistilBertModel
+    - forward
+DistilBertForMaskedLM
+[[autodoc]] DistilBertForMaskedLM
+    - forward
+DistilBertForSequenceClassification
+[[autodoc]] DistilBertForSequenceClassification
+    - forward
+DistilBertForMultipleChoice
+[[autodoc]] DistilBertForMultipleChoice
+    - forward
+DistilBertForTokenClassification
+[[autodoc]] DistilBertForTokenClassification
+    - forward
+DistilBertForQuestionAnswering
+[[autodoc]] DistilBertForQuestionAnswering
+    - forward
+
+TFDistilBertModel
+[[autodoc]] TFDistilBertModel
+    - call
+TFDistilBertForMaskedLM
+[[autodoc]] TFDistilBertForMaskedLM
+    - call
+TFDistilBertForSequenceClassification
+[[autodoc]] TFDistilBertForSequenceClassification
+    - call
+TFDistilBertForMultipleChoice
+[[autodoc]] TFDistilBertForMultipleChoice
+    - call
+TFDistilBertForTokenClassification
+[[autodoc]] TFDistilBertForTokenClassification
+    - call
+TFDistilBertForQuestionAnswering
+[[autodoc]] TFDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..214caaab48358e8c3453346f58f7442cea5001e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_distilbert.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+FlaxDistilBertModel
+[[autodoc]] FlaxDistilBertModel
+    - call
+FlaxDistilBertForMaskedLM
+[[autodoc]] FlaxDistilBertForMaskedLM
+    - call
+FlaxDistilBertForSequenceClassification
+[[autodoc]] FlaxDistilBertForSequenceClassification
+    - call
+FlaxDistilBertForMultipleChoice
+[[autodoc]] FlaxDistilBertForMultipleChoice
+    - call
+FlaxDistilBertForTokenClassification
+[[autodoc]] FlaxDistilBertForTokenClassification
+    - call
+FlaxDistilBertForQuestionAnswering
+[[autodoc]] FlaxDistilBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7da14e3726bfed10e50af1b089153b1df685553d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+DiT
+Overview
+DiT was proposed in DiT: Self-supervised Pre-training for Document Image Transformer by Junlong Li, Yiheng Xu, Tengchao Lv, Lei Cui, Cha Zhang, Furu Wei.
+DiT applies the self-supervised objective of BEiT (BERT pre-training of Image Transformers) to 42 million document images, allowing for state-of-the-art results on tasks including:
+
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document layout analysis: the PubLayNet dataset (a collection of more
+  than 360,000 document images constructed by automatically parsing PubMed XML files).
+table detection: the ICDAR 2019 cTDaR dataset (a collection of
+  600 training images and 240 testing images).
+
+The abstract from the paper is the following:
+*Image Transformer has recently achieved significant progress for natural image understanding, either using supervised (ViT, DeiT, etc.) or self-supervised (BEiT, MAE, etc.) pre-training techniques. In this paper, we propose DiT, a self-supervised pre-trained Document Image Transformer model using large-scale unlabeled text images for Document AI tasks, which is essential since no supervised counterparts ever exist due to the lack of human labeled document images. We leverage DiT as the backbone network in a variety of vision-based Document AI tasks, including document image classification, document layout analysis, as well as table detection. Experiment results have illustrated that the self-supervised pre-trained DiT model achieves new state-of-the-art results on these downstream tasks, e.g. document image classification (91.11 → 92.69), document layout analysis (91.0 → 94.9) and table detection (94.23 → 96.55). *
+ 
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+One can directly use the weights of DiT with the AutoModel API:
+thon
+from transformers import AutoModel
+model = AutoModel.from_pretrained("microsoft/dit-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a22b0d936061b0b7c6a4ff60f79b9bda26a0a21c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dit.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+This will load the model pre-trained on masked image modeling. Note that this won't include the language modeling head on top, used to predict visual tokens.
+To include the head, you can load the weights into a BeitForMaskedImageModeling model, like so:
+thon
+from transformers import BeitForMaskedImageModeling
+model = BeitForMaskedImageModeling.from_pretrained("microsoft/dit-base")
+
+You can also load a fine-tuned model from the hub, like so:
+thon
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("microsoft/dit-base-finetuned-rvlcdip")
+
+This particular checkpoint was fine-tuned on RVL-CDIP, an important benchmark for document image classification.
+A notebook that illustrates inference for document image classification can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DiT.
+
+[BeitForImageClassification] is supported by this example script and notebook.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+As DiT's architecture is equivalent to that of BEiT, one can refer to BEiT's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178965df541326694cfc31b4de534e6a0745574b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Donut
+Overview
+The Donut model was proposed in OCR-free Document Understanding Transformer by
+Geewook Kim, Teakgyu Hong, Moonbin Yim, Jeongyeon Nam, Jinyoung Park, Jinyeong Yim, Wonseok Hwang, Sangdoo Yun, Dongyoon Han, Seunghyun Park.
+Donut consists of an image Transformer encoder and an autoregressive text Transformer decoder to perform document understanding
+tasks such as document image classification, form understanding and visual question answering.
+The abstract from the paper is the following:
+Understanding document images (e.g., invoices) is a core but challenging task since it requires complex functions such as reading text and a holistic understanding of the document. Current Visual Document Understanding (VDU) methods outsource the task of reading text to off-the-shelf Optical Character Recognition (OCR) engines and focus on the understanding task with the OCR outputs. Although such OCR-based approaches have shown promising performance, they suffer from 1) high computational costs for using OCR; 2) inflexibility of OCR models on languages or types of document; 3) OCR error propagation to the subsequent process. To address these issues, in this paper, we introduce a novel OCR-free VDU model named Donut, which stands for Document understanding transformer. As the first step in OCR-free VDU research, we propose a simple architecture (i.e., Transformer) with a pre-training objective (i.e., cross-entropy loss). Donut is conceptually simple yet effective. Through extensive experiments and analyses, we show a simple OCR-free VDU model, Donut, achieves state-of-the-art performances on various VDU tasks in terms of both speed and accuracy. In addition, we offer a synthetic data generator that helps the model pre-training to be flexible in various languages and domains.
+
+ Donut high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Donut is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Donut is always used within the VisionEncoderDecoder framework.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09a67576f0bf1ea3a654e0580f39b1630cc537b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Inference examples
+Donut's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [DonutImageProcessor] class is responsible for preprocessing the input image and
+[XLMRobertaTokenizer/XLMRobertaTokenizerFast] decodes the generated target tokens to the target string. The
+[DonutProcessor] wraps [DonutImageProcessor] and [XLMRobertaTokenizer/XLMRobertaTokenizerFast]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Document Image Classification
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[1]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'class': 'advertisement'}
+
+Step-by-step Document Parsing
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3fdb3128051da27003bb7f6d4c36e87ec50b342a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Step-by-step Document Parsing
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[2]["image"]
+prepare decoder inputs
+task_prompt = ""
+decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'menu': {'nm': 'CINNAMON SUGAR', 'unitprice': '17,000', 'cnt': '1 x', 'price': '17,000'}, 'sub_total': {'subtotal_price': '17,000'}, 'total': {'total_price': '17,000', 'cashprice': '20,000', 'changeprice': '3,000'}}
+
+Step-by-step Document Visual Question Answering (DocVQA)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc0a18753bd80e234e25b854532aa2bbc3dd9a49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_3.txt
@@ -0,0 +1,34 @@
+Step-by-step Document Visual Question Answering (DocVQA)
+
+import re
+from transformers import DonutProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+load document image from the DocVQA dataset
+dataset = load_dataset("hf-internal-testing/example-documents", split="test")
+image = dataset[0]["image"]
+prepare decoder inputs
+task_prompt = "{user_input}"
+question = "When is the coffee break?"
+prompt = task_prompt.replace("{user_input}", question)
+decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
+pixel_values = processor(image, return_tensors="pt").pixel_values
+outputs = model.generate(
+     pixel_values.to(device),
+     decoder_input_ids=decoder_input_ids.to(device),
+     max_length=model.decoder.config.max_position_embeddings,
+     pad_token_id=processor.tokenizer.pad_token_id,
+     eos_token_id=processor.tokenizer.eos_token_id,
+     use_cache=True,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+     return_dict_in_generate=True,
+ )
+sequence = processor.batch_decode(outputs.sequences)[0]
+sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+print(processor.token2json(sequence))
+{'question': 'When is the coffee break?', 'answer': '11-14 to 11:39 a.m.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d41add334c858b697aa5a2914ceae5faedafcd21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_donut.txt_chunk_4.txt
@@ -0,0 +1,21 @@
+See the model hub to look for Donut checkpoints.
+Training
+We refer to the tutorial notebooks.
+DonutSwinConfig
+[[autodoc]] DonutSwinConfig
+DonutImageProcessor
+[[autodoc]] DonutImageProcessor
+    - preprocess
+DonutFeatureExtractor
+[[autodoc]] DonutFeatureExtractor
+    - call
+DonutProcessor
+[[autodoc]] DonutProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+DonutSwinModel
+[[autodoc]] DonutSwinModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39476d43245b9da0d3a113d9a5ad436c5d444cd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_0.txt
@@ -0,0 +1,41 @@
+DPR
+
+Overview
+Dense Passage Retrieval (DPR) is a set of tools and models for state-of-the-art open-domain Q&A research. It was
+introduced in Dense Passage Retrieval for Open-Domain Question Answering by
+Vladimir Karpukhin, Barlas Oğuz, Sewon Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, Wen-tau Yih.
+The abstract from the paper is the following:
+Open-domain question answering relies on efficient passage retrieval to select candidate contexts, where traditional
+sparse vector space models, such as TF-IDF or BM25, are the de facto method. In this work, we show that retrieval can
+be practically implemented using dense representations alone, where embeddings are learned from a small number of
+questions and passages by a simple dual-encoder framework. When evaluated on a wide range of open-domain QA datasets,
+our dense retriever outperforms a strong Lucene-BM25 system largely by 9%-19% absolute in terms of top-20 passage
+retrieval accuracy, and helps our end-to-end QA system establish new state-of-the-art on multiple open-domain QA
+benchmarks.
+This model was contributed by lhoestq. The original code can be found here.
+Usage tips
+
+DPR consists in three models:
+
+Question encoder: encode questions as vectors
+Context encoder: encode contexts as vectors
+Reader: extract the answer of the questions inside retrieved contexts, along with a relevance score (high if the inferred span actually answers the question).
+
+DPRConfig
+[[autodoc]] DPRConfig
+DPRContextEncoderTokenizer
+[[autodoc]] DPRContextEncoderTokenizer
+DPRContextEncoderTokenizerFast
+[[autodoc]] DPRContextEncoderTokenizerFast
+DPRQuestionEncoderTokenizer
+[[autodoc]] DPRQuestionEncoderTokenizer
+DPRQuestionEncoderTokenizerFast
+[[autodoc]] DPRQuestionEncoderTokenizerFast
+DPRReaderTokenizer
+[[autodoc]] DPRReaderTokenizer
+DPRReaderTokenizerFast
+[[autodoc]] DPRReaderTokenizerFast
+DPR specific outputs
+[[autodoc]] models.dpr.modeling_dpr.DPRContextEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRQuestionEncoderOutput
+[[autodoc]] models.dpr.modeling_dpr.DPRReaderOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6fa76049bd0e92b38e1ab7c472c1992ed7e94ec9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpr.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+DPRContextEncoder
+[[autodoc]] DPRContextEncoder
+    - forward
+DPRQuestionEncoder
+[[autodoc]] DPRQuestionEncoder
+    - forward
+DPRReader
+[[autodoc]] DPRReader
+    - forward
+
+TFDPRContextEncoder
+[[autodoc]] TFDPRContextEncoder
+    - call
+TFDPRQuestionEncoder
+[[autodoc]] TFDPRQuestionEncoder
+    - call
+TFDPRReader
+[[autodoc]] TFDPRReader
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc4b388f650164707ec5844efa5760a13f27748d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+DPT
+Overview
+The DPT model was proposed in Vision Transformers for Dense Prediction by René Ranftl, Alexey Bochkovskiy, Vladlen Koltun.
+DPT is a model that leverages the Vision Transformer (ViT) as backbone for dense prediction tasks like semantic segmentation and depth estimation.
+The abstract from the paper is the following:
+We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6f1425acdfc23c3caf6fb7b11b9b0d4a8fad74a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_dpt.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+DPT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+DPT is compatible with the [AutoBackbone] class. This allows to use the DPT framework with various computer vision backbones available in the library, such as [VitDetBackbone] or [Dinov2Backbone]. One can create it as follows:
+thon
+from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation
+initialize with a Transformer-based backbone such as DINOv2
+in that case, we also specify reshape_hidden_states=False to get feature maps of shape (batch_size, num_channels, height, width)
+backbone_config = Dinov2Config.from_pretrained("facebook/dinov2-base", out_features=["stage1", "stage2", "stage3", "stage4"], reshape_hidden_states=False)
+config = DPTConfig(backbone_config=backbone_config)
+model = DPTForDepthEstimation(config=config)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DPT.
+
+Demo notebooks for [DPTForDepthEstimation] can be found here.
+
+Semantic segmentation task guide
+
+Monocular depth estimation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+DPTConfig
+[[autodoc]] DPTConfig
+DPTFeatureExtractor
+[[autodoc]] DPTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+DPTImageProcessor
+[[autodoc]] DPTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+DPTModel
+[[autodoc]] DPTModel
+    - forward
+DPTForDepthEstimation
+[[autodoc]] DPTForDepthEstimation
+    - forward
+DPTForSemanticSegmentation
+[[autodoc]] DPTForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..829276ac090f3d1300635ba8126a3b84e0eb1c23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+EfficientFormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed3a84c8256b851b20d56b136195f44bfcb94deb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+Overview
+The EfficientFormer model was proposed in EfficientFormer: Vision Transformers at MobileNet Speed
+by Yanyu Li, Geng Yuan, Yang Wen, Eric Hu, Georgios Evangelidis, Sergey Tulyakov, Yanzhi Wang, Jian Ren.  EfficientFormer proposes a
+dimension-consistent pure transformer that can be run on mobile devices for dense prediction tasks like image classification, object
+detection and semantic segmentation.
+The abstract from the paper is the following:
+Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks.
+However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally
+times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly
+challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation
+complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still
+unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance?
+To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs.
+Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm.
+Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer.
+Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices.
+Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on
+iPhone 12 (compiled with CoreML), which { runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1),} and our largest model,
+EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can
+reach extremely low latency on mobile devices while maintaining high performance.
+This model was contributed by novice03 and Bearnardd.
+The original code can be found here. The TensorFlow version of this model was added by D-Roberts.
+Documentation resources
+
+Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e627ac4b84a7ae9fa86799a5354eb045a29ff71
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientformer.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Image classification task guide
+
+EfficientFormerConfig
+[[autodoc]] EfficientFormerConfig
+EfficientFormerImageProcessor
+[[autodoc]] EfficientFormerImageProcessor
+    - preprocess
+
+EfficientFormerModel
+[[autodoc]] EfficientFormerModel
+    - forward
+EfficientFormerForImageClassification
+[[autodoc]] EfficientFormerForImageClassification
+    - forward
+EfficientFormerForImageClassificationWithTeacher
+[[autodoc]] EfficientFormerForImageClassificationWithTeacher
+    - forward
+
+TFEfficientFormerModel
+[[autodoc]] TFEfficientFormerModel
+    - call
+TFEfficientFormerForImageClassification
+[[autodoc]] TFEfficientFormerForImageClassification
+    - call
+TFEfficientFormerForImageClassificationWithTeacher
+[[autodoc]] TFEfficientFormerForImageClassificationWithTeacher
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a325669e81c517da4c4f52f509d5a531d89c8035
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_efficientnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EfficientNet
+Overview
+The EfficientNet model was proposed in EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks 
+by Mingxing Tan and Quoc V. Le. EfficientNets are a family of image classification models, which achieve state-of-the-art accuracy, yet being an order-of-magnitude smaller and faster than previous models.
+The abstract from the paper is the following:
+Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.
+To go even further, we use neural architecture search to design a new baseline network and scale it up to obtain a family of models, called EfficientNets, which achieve much better accuracy and efficiency than previous ConvNets. In particular, our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet. Our EfficientNets also transfer well and achieve state-of-the-art accuracy on CIFAR-100 (91.7%), Flowers (98.8%), and 3 other transfer learning datasets, with an order of magnitude fewer parameters.
+This model was contributed by adirik.
+The original code can be found here.
+EfficientNetConfig
+[[autodoc]] EfficientNetConfig
+EfficientNetImageProcessor
+[[autodoc]] EfficientNetImageProcessor
+    - preprocess
+EfficientNetModel
+[[autodoc]] EfficientNetModel
+    - forward
+EfficientNetForImageClassification
+[[autodoc]] EfficientNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89c89bbe73a99cf4b19864ebd30ec645c5732645
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+ELECTRA
+
+Overview
+The ELECTRA model was proposed in the paper ELECTRA: Pre-training Text Encoders as Discriminators Rather Than
+Generators. ELECTRA is a new pretraining approach which trains two
+transformer models: the generator and the discriminator. The generator's role is to replace tokens in a sequence, and
+is therefore trained as a masked language model. The discriminator, which is the model we're interested in, tries to
+identify which tokens were replaced by the generator in the sequence.
+The abstract from the paper is the following:
+Masked language modeling (MLM) pretraining methods such as BERT corrupt the input by replacing some tokens with [MASK]
+and then train a model to reconstruct the original tokens. While they produce good results when transferred to
+downstream NLP tasks, they generally require large amounts of compute to be effective. As an alternative, we propose a
+more sample-efficient pretraining task called replaced token detection. Instead of masking the input, our approach
+corrupts it by replacing some tokens with plausible alternatives sampled from a small generator network. Then, instead
+of training a model that predicts the original identities of the corrupted tokens, we train a discriminative model that
+predicts whether each token in the corrupted input was replaced by a generator sample or not. Thorough experiments
+demonstrate this new pretraining task is more efficient than MLM because the task is defined over all input tokens
+rather than just the small subset that was masked out. As a result, the contextual representations learned by our
+approach substantially outperform the ones learned by BERT given the same model size, data, and compute. The gains are
+particularly strong for small models; for example, we train a model on one GPU for 4 days that outperforms GPT (trained
+using 30x more compute) on the GLUE natural language understanding benchmark. Our approach also works well at scale,
+where it performs comparably to RoBERTa and XLNet while using less than 1/4 of their compute and outperforms them when
+using the same amount of compute.
+This model was contributed by lysandre. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3fa7e5c5acc8edc031cd752a0d52ff2be3d0775
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+ELECTRA is the pretraining approach, therefore there is nearly no changes done to the underlying model: BERT. The
+  only change is the separation of the embedding size and the hidden size: the embedding size is generally smaller,
+  while the hidden size is larger. An additional projection layer (linear) is used to project the embeddings from their
+  embedding size to the hidden size. In the case where the embedding size is the same as the hidden size, no projection
+  layer is used.
+ELECTRA is a transformer model pretrained with the use of another (small) masked language model. The inputs are corrupted by that language model, which takes an input text that is randomly masked and outputs a text in which ELECTRA has to predict which token is an original and which one has been replaced. Like for GAN training, the small language model is trained for a few steps (but with the original texts as objective, not to fool the ELECTRA model like in a traditional GAN setting) then the ELECTRA model is trained for a few steps.
+The ELECTRA checkpoints saved using Google Research's implementation
+  contain both the generator and discriminator. The conversion script requires the user to name which model to export
+  into the correct architecture. Once converted to the HuggingFace format, these checkpoints may be loaded into all
+  available ELECTRA models, however. This means that the discriminator may be loaded in the
+  [ElectraForMaskedLM] model, and the generator may be loaded in the
+  [ElectraForPreTraining] model (the classification head will be randomly initialized as it
+  doesn't exist in the generator).
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ElectraConfig
+[[autodoc]] ElectraConfig
+ElectraTokenizer
+[[autodoc]] ElectraTokenizer
+ElectraTokenizerFast
+[[autodoc]] ElectraTokenizerFast
+Electra specific outputs
+[[autodoc]] models.electra.modeling_electra.ElectraForPreTrainingOutput
+[[autodoc]] models.electra.modeling_tf_electra.TFElectraForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36b49b1a5b97923aa068ab37f127d621d53b9545
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+ElectraModel
+[[autodoc]] ElectraModel
+    - forward
+ElectraForPreTraining
+[[autodoc]] ElectraForPreTraining
+    - forward
+ElectraForCausalLM
+[[autodoc]] ElectraForCausalLM
+    - forward
+ElectraForMaskedLM
+[[autodoc]] ElectraForMaskedLM
+    - forward
+ElectraForSequenceClassification
+[[autodoc]] ElectraForSequenceClassification
+    - forward
+ElectraForMultipleChoice
+[[autodoc]] ElectraForMultipleChoice
+    - forward
+ElectraForTokenClassification
+[[autodoc]] ElectraForTokenClassification
+    - forward
+ElectraForQuestionAnswering
+[[autodoc]] ElectraForQuestionAnswering
+    - forward
+
+TFElectraModel
+[[autodoc]] TFElectraModel
+    - call
+TFElectraForPreTraining
+[[autodoc]] TFElectraForPreTraining
+    - call
+TFElectraForMaskedLM
+[[autodoc]] TFElectraForMaskedLM
+    - call
+TFElectraForSequenceClassification
+[[autodoc]] TFElectraForSequenceClassification
+    - call
+TFElectraForMultipleChoice
+[[autodoc]] TFElectraForMultipleChoice
+    - call
+TFElectraForTokenClassification
+[[autodoc]] TFElectraForTokenClassification
+    - call
+TFElectraForQuestionAnswering
+[[autodoc]] TFElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23fff0552467c83618ce5c34b217552b9de92f6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_electra.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+FlaxElectraModel
+[[autodoc]] FlaxElectraModel
+    - call
+FlaxElectraForPreTraining
+[[autodoc]] FlaxElectraForPreTraining
+    - call
+FlaxElectraForCausalLM
+[[autodoc]] FlaxElectraForCausalLM
+    - call
+FlaxElectraForMaskedLM
+[[autodoc]] FlaxElectraForMaskedLM
+    - call
+FlaxElectraForSequenceClassification
+[[autodoc]] FlaxElectraForSequenceClassification
+    - call
+FlaxElectraForMultipleChoice
+[[autodoc]] FlaxElectraForMultipleChoice
+    - call
+FlaxElectraForTokenClassification
+[[autodoc]] FlaxElectraForTokenClassification
+    - call
+FlaxElectraForQuestionAnswering
+[[autodoc]] FlaxElectraForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82eb97db681aef3986b77deb896ed36dbf427602
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+EnCodec
+Overview
+The EnCodec neural codec model was proposed in High Fidelity Neural Audio Compression by Alexandre Défossez, Jade Copet, Gabriel Synnaeve, Yossi Adi.
+The abstract from the paper is the following:
+We introduce a state-of-the-art real-time, high-fidelity, audio codec leveraging neural networks. It consists in a streaming encoder-decoder architecture with quantized latent space trained in an end-to-end fashion. We simplify and speed-up the training by using a single multiscale spectrogram adversary that efficiently reduces artifacts and produce high-quality samples. We introduce a novel loss balancer mechanism to stabilize training: the weight of a loss now defines the fraction of the overall gradient it should represent, thus decoupling the choice of this hyper-parameter from the typical scale of the loss. Finally, we study how lightweight Transformer models can be used to further compress the obtained representation by up to 40%, while staying faster than real time. We provide a detailed description of the key design choices of the proposed model including: training objective, architectural changes and a study of various perceptual loss functions. We present an extensive subjective evaluation (MUSHRA tests) together with an ablation study for a range of bandwidths and audio domains, including speech, noisy-reverberant speech, and music. Our approach is superior to the baselines methods across all evaluated settings, considering both 24 kHz monophonic and 48 kHz stereophonic audio.
+This model was contributed by Matthijs, Patrick Von Platen and Arthur Zucker. 
+The original code can be found here.
+Usage example
+Here is a quick example of how to encode and decode an audio using this model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3b700c4bd4dd0894d2f751127dc549a2d812e6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encodec.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+from datasets import load_dataset, Audio
+from transformers import EncodecModel, AutoProcessor
+librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+model = EncodecModel.from_pretrained("facebook/encodec_24khz")
+processor = AutoProcessor.from_pretrained("facebook/encodec_24khz")
+librispeech_dummy = librispeech_dummy.cast_column("audio", Audio(sampling_rate=processor.sampling_rate))
+audio_sample = librispeech_dummy[-1]["audio"]["array"]
+inputs = processor(raw_audio=audio_sample, sampling_rate=processor.sampling_rate, return_tensors="pt")
+encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
+audio_values = model.decode(encoder_outputs.audio_codes, encoder_outputs.audio_scales, inputs["padding_mask"])[0]
+or the equivalent with a forward pass
+audio_values = model(inputs["input_values"], inputs["padding_mask"]).audio_values
+
+EncodecConfig
+[[autodoc]] EncodecConfig
+EncodecFeatureExtractor
+[[autodoc]] EncodecFeatureExtractor
+    - call
+EncodecModel
+[[autodoc]] EncodecModel
+    - decode
+    - encode
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82caaa12d5444dcdecaabf83bc03e96676602253
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Encoder Decoder Models
+Overview
+The [EncoderDecoderModel] can be used to initialize a sequence-to-sequence model with any
+pretrained autoencoding model as the encoder and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing sequence-to-sequence models with pretrained checkpoints for sequence generation tasks
+was shown in Leveraging Pre-trained Checkpoints for Sequence Generation Tasks by
+Sascha Rothe, Shashi Narayan, Aliaksei Severyn.
+After such an [EncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like
+any other models (see the examples for more information).
+An application of this architecture could be to leverage two pretrained [BertModel] as the encoder
+and decoder for a summarization model as was shown in: Text Summarization with Pretrained Encoders by Yang Liu and Mirella Lapata.
+Randomly initializing EncoderDecoderModel from model configurations.
+[EncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [BertModel] configuration for the encoder and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, EncoderDecoderConfig, EncoderDecoderModel
+config_encoder = BertConfig()
+config_decoder = BertConfig()
+config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = EncoderDecoderModel(config=config)
+
+Initialising EncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[EncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained auto-encoding model, e.g. BERT, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [EncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the EncoderDecoderModel class provides a [EncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..67c2906eac4ded422b3fe01e8eedf5fd6df443ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+from transformers import EncoderDecoderModel, BertTokenizer
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+
+Loading an existing EncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the EncoderDecoderModel class, [EncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import AutoTokenizer, EncoderDecoderModel
+load a fine-tuned seq2seq model and corresponding tokenizer
+model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+tokenizer = AutoTokenizer.from_pretrained("patrickvonplaten/bert2bert_cnn_daily_mail")
+let's perform inference on a long piece of text
+ARTICLE_TO_SUMMARIZE = (
+     "PG&E stated it scheduled the blackouts in response to forecasts for high winds "
+     "amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were "
+     "scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."
+ )
+input_ids = tokenizer(ARTICLE_TO_SUMMARIZE, return_tensors="pt").input_ids
+autoregressively generate summary (uses greedy decoding by default)
+generated_ids = model.generate(input_ids)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+nearly 800 thousand customers were affected by the shutoffs. the aim is to reduce the risk of wildfires. nearly 800, 000 customers were expected to be affected by high winds amid dry conditions. pg & e said it scheduled the blackouts to last through at least midday tomorrow.
+
+Loading a PyTorch checkpoint into TFEncoderDecoderModel.
+[TFEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+pytorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only pytorch
+checkpoints for a particular encoder-decoder model, a workaround is:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cbd6b2ebd066b93f7c39210d1268ef6377ec8b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+a workaround to load from pytorch checkpoint
+from transformers import EncoderDecoderModel, TFEncoderDecoderModel
+_model = EncoderDecoderModel.from_pretrained("patrickvonplaten/bert2bert-cnn_dailymail-fp16")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8d50301522304ce6496f0b18b29354e7ca5760c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+from transformers import BertTokenizer, EncoderDecoderModel
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = EncoderDecoderModel.from_encoder_decoder_pretrained("google-bert/bert-base-uncased", "google-bert/bert-base-uncased")
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+input_ids = tokenizer(
+     "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side.During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was  finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft).Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+     return_tensors="pt",
+ ).input_ids
+labels = tokenizer(
+     "the eiffel tower surpassed the washington monument to become the tallest structure in the world. it was the first structure to reach a height of 300 metres in paris in 1930. it is now taller than the chrysler building by 5. 2 metres ( 17 ft ) and is the second tallest free - standing structure in paris.",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+
+Detailed colab for training.
+This model was contributed by thomwolf. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+EncoderDecoderConfig
+[[autodoc]] EncoderDecoderConfig
+
+EncoderDecoderModel
+[[autodoc]] EncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e708b5f8e4687b5d7770310ca58c969a7eaffa7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_encoder-decoder.txt_chunk_4.txt
@@ -0,0 +1,9 @@
+TFEncoderDecoderModel
+[[autodoc]] TFEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxEncoderDecoderModel
+[[autodoc]] FlaxEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6765bc915f00faa8037a530e9453fd9687e3b25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+ERNIE
+Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including ERNIE1.0, ERNIE2.0,
+ERNIE3.0, ERNIE-Gram, ERNIE-health, etc.
+These models are contributed by nghuyong and the official code can be found in PaddleNLP (in PaddlePaddle).
+Usage example
+Take ernie-1.0-base-zh as an example:
+Python
+from transformers import AutoTokenizer, AutoModel
+tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
+model = AutoModel.from_pretrained("nghuyong/ernie-1.0-base-zh")
+Model checkpoints
+|     Model Name      | Language |           Description           |
+|:-------------------:|:--------:|:-------------------------------:|
+|  ernie-1.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|  ernie-2.0-base-en  | English  | Layer:12, Heads:12, Hidden:768  |
+| ernie-2.0-large-en  | English  | Layer:24, Heads:16, Hidden:1024 |
+|  ernie-3.0-base-zh  | Chinese  | Layer:12, Heads:12, Hidden:768  |
+| ernie-3.0-medium-zh | Chinese  |  Layer:6, Heads:12, Hidden:768  |
+|  ernie-3.0-mini-zh  | Chinese  |  Layer:6, Heads:12, Hidden:384  |
+| ernie-3.0-micro-zh  | Chinese  |  Layer:4, Heads:12, Hidden:384  |
+|  ernie-3.0-nano-zh  | Chinese  |  Layer:4, Heads:12, Hidden:312  |
+|   ernie-health-zh   | Chinese  | Layer:12, Heads:12, Hidden:768  |
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f048793c0d9928ff437b555b12e4ca95ef32176e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+|    ernie-gram-zh    | Chinese  | Layer:12, Heads:12, Hidden:768  |
+You can find all the supported models from huggingface's model hub: huggingface.co/nghuyong, and model details from paddle's official
+repo: PaddleNLP
+and ERNIE.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb2f48a0b372be617902084576d361fdc221ccde
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+ErnieConfig
+[[autodoc]] ErnieConfig
+    - all
+Ernie specific outputs
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+ErnieModel
+[[autodoc]] ErnieModel
+    - forward
+ErnieForPreTraining
+[[autodoc]] ErnieForPreTraining
+    - forward
+ErnieForCausalLM
+[[autodoc]] ErnieForCausalLM
+    - forward
+ErnieForMaskedLM
+[[autodoc]] ErnieForMaskedLM
+    - forward
+ErnieForNextSentencePrediction
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+ErnieForSequenceClassification
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+ErnieForMultipleChoice
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+ErnieForTokenClassification
+[[autodoc]] ErnieForTokenClassification
+    - forward
+ErnieForQuestionAnswering
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..782b562b715d2a7053b2fadff5126c5626a6d7db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+ErnieM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The ErnieM model was proposed in ERNIE-M: Enhanced Multilingual Representation by Aligning
+Cross-lingual Semantics with Monolingual Corpora  by Xuan Ouyang, Shuohuan Wang, Chao Pang, Yu Sun,
+Hao Tian, Hua Wu, Haifeng Wang.
+The abstract from the paper is the following:
+Recent studies have demonstrated that pre-trained cross-lingual models achieve impressive performance in downstream cross-lingual tasks. This improvement benefits from learning a large amount of monolingual and parallel corpora. Although it is generally acknowledged that parallel corpora are critical for improving the model performance, existing methods are often constrained by the size of parallel corpora, especially for lowresource languages. In this paper, we propose ERNIE-M, a new training method that encourages the model to align the representation of multiple languages with monolingual corpora, to overcome the constraint that the parallel corpus size places on the model performance. Our key insight is to integrate back-translation into the pre-training process. We generate pseudo-parallel sentence pairs on a monolingual corpus to enable the learning of semantic alignments between different languages, thereby enhancing the semantic modeling of cross-lingual models. Experimental results show that ERNIE-M outperforms existing cross-lingual models and delivers new state-of-the-art results in various cross-lingual downstream tasks.
+This model was contributed by Susnato Dhar. The original code can be found here.
+Usage tips
+
+Ernie-M is a BERT-like model so it is a stacked Transformer Encoder.
+Instead of using MaskedLM for pretraining (like BERT) the authors used two novel techniques: Cross-attention Masked Language Modeling and Back-translation Masked Language Modeling. For now these two LMHead objectives are not implemented here.
+It is a multilingual language model.
+Next Sentence Prediction was not used in pretraining process.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c56fa026e2971fe6124200ab82750c4aba7a2918
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ernie_m.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Multiple choice task guide
+
+ErnieMConfig
+[[autodoc]] ErnieMConfig
+ErnieMTokenizer
+[[autodoc]] ErnieMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+ErnieMModel
+[[autodoc]] ErnieMModel
+    - forward
+ErnieMForSequenceClassification
+[[autodoc]] ErnieMForSequenceClassification
+    - forward
+ErnieMForMultipleChoice
+[[autodoc]] ErnieMForMultipleChoice
+    - forward
+ErnieMForTokenClassification
+[[autodoc]] ErnieMForTokenClassification
+    - forward
+ErnieMForQuestionAnswering
+[[autodoc]] ErnieMForQuestionAnswering
+    - forward
+ErnieMForInformationExtraction
+[[autodoc]] ErnieMForInformationExtraction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43e2585848d550b61eb833c481680a0a12c27a4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+ESM
+Overview
+This page provides code and pre-trained weights for Transformer protein language models from Meta AI's Fundamental 
+AI Research Team, providing the state-of-the-art ESMFold and ESM-2, and the previously released ESM-1b and ESM-1v.
+Transformer protein language models were introduced in the paper Biological structure and function emerge from scaling
+unsupervised learning to 250 million protein sequences by 
+Alexander Rives, Joshua Meier, Tom Sercu, Siddharth Goyal, Zeming Lin, Jason Liu, Demi Guo, Myle Ott, 
+C. Lawrence Zitnick, Jerry Ma, and Rob Fergus.
+The first version of this paper was preprinted in 2019.
+ESM-2 outperforms all tested single-sequence protein language models across a range of structure prediction tasks,
+and enables atomic resolution structure prediction.
+It was released with the paper Language models of protein sequences at the scale of evolution enable accurate
+structure prediction by Zeming Lin, Halil Akin, Roshan Rao, Brian Hie,
+Zhongkai Zhu, Wenting Lu, Allan dos Santos Costa, Maryam Fazel-Zarandi, Tom Sercu, Sal Candido and Alexander Rives.
+Also introduced in this paper was ESMFold. It uses an ESM-2 stem with a head that can predict folded protein
+structures with state-of-the-art accuracy. Unlike AlphaFold2,
+it relies on the token embeddings from the large pre-trained protein language model stem and does not perform a multiple
+sequence alignment (MSA) step at inference time, which means that ESMFold checkpoints are fully "standalone" -
+they do not require a database of known protein sequences and structures with associated external query tools
+to make predictions, and are much faster as a result.
+The abstract from 
+"Biological structure and function emerge from scaling unsupervised learning to 250 
+million protein sequences" is
+In the field of artificial intelligence, a combination of scale in data and model capacity enabled by unsupervised
+learning has led to major advances in representation learning and statistical generation. In the life sciences, the
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e37ffaf21f22b8332cfc84e7228df16fbd59c6ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+anticipated growth of sequencing promises unprecedented data on natural sequence diversity. Protein language modeling
+at the scale of evolution is a logical step toward predictive and generative artificial intelligence for biology. To
+this end, we use unsupervised learning to train a deep contextual language model on 86 billion amino acids across 250
+million protein sequences spanning evolutionary diversity. The resulting model contains information about biological
+properties in its representations. The representations are learned from sequence data alone. The learned representation
+space has a multiscale organization reflecting structure from the level of biochemical properties of amino acids to
+remote homology of proteins. Information about secondary and tertiary structure is encoded in the representations and
+can be identified by linear projections. Representation learning produces features that generalize across a range of
+applications, enabling state-of-the-art supervised prediction of mutational effect and secondary structure and
+improving state-of-the-art features for long-range contact prediction.
+The abstract from
+"Language models of protein sequences at the scale of evolution enable accurate structure prediction" is
+Large language models have recently been shown to develop emergent capabilities with scale, going beyond
+simple pattern matching to perform higher level reasoning and generate lifelike images and text. While
+language models trained on protein sequences have been studied at a smaller scale, little is known about
+what they learn about biology as they are scaled up. In this work we train models up to 15 billion parameters,
+the largest language models of proteins to be evaluated to date. We find that as models are scaled they learn
+information enabling the prediction of the three-dimensional structure of a protein at the resolution of
+individual atoms. We present ESMFold for high accuracy end-to-end atomic level structure prediction directly
+from the individual sequence of a protein. ESMFold has similar accuracy to AlphaFold2 and RoseTTAFold for
+sequences with low perplexity that are well understood by the language model. ESMFold inference is an
+order of magnitude faster than AlphaFold2, enabling exploration of the structural space of metagenomic
+proteins in practical timescales.
+The original code can be found here and was
+was developed by the Fundamental AI Research team at Meta AI.
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..977096db6351ed7140dc546eea75c5de95814655
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+ESM-1b, ESM-1v and ESM-2 were contributed to huggingface by jasonliu
+and Matt.
+ESMFold was contributed to huggingface by Matt and
+Sylvain, with a big thank you to Nikita Smetanin, Roshan Rao and Tom Sercu for their
+help throughout the process!
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10269cea795b45d8f12538c69548406f26a75c7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_esm.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+ESM models are trained with a masked language modeling (MLM) objective.
+The HuggingFace port of ESMFold uses portions of the openfold library. The openfold library is licensed under the Apache License 2.0.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Masked language modeling task guide
+
+EsmConfig
+[[autodoc]] EsmConfig
+    - all
+EsmTokenizer
+[[autodoc]] EsmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+EsmModel
+[[autodoc]] EsmModel
+    - forward
+EsmForMaskedLM
+[[autodoc]] EsmForMaskedLM
+    - forward
+EsmForSequenceClassification
+[[autodoc]] EsmForSequenceClassification
+    - forward
+EsmForTokenClassification
+[[autodoc]] EsmForTokenClassification
+    - forward
+EsmForProteinFolding
+[[autodoc]] EsmForProteinFolding
+    - forward
+
+TFEsmModel
+[[autodoc]] TFEsmModel
+    - call
+TFEsmForMaskedLM
+[[autodoc]] TFEsmForMaskedLM
+    - call
+TFEsmForSequenceClassification
+[[autodoc]] TFEsmForSequenceClassification
+    - call
+TFEsmForTokenClassification
+[[autodoc]] TFEsmForTokenClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_falcon.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..437762e2371d95148120ef2cff07e89b9db88899
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_falcon.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Falcon
+Overview
+Falcon is a class of causal decoder-only models built by TII. The largest Falcon checkpoints
+have been trained on >=1T tokens of text, with a particular emphasis on the RefinedWeb
+corpus. They are made available under the Apache 2.0 license.
+Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient
+attention variants like FlashAttention. Both 'base' models trained only as causal language models as well as
+'instruct' models that have received further fine-tuning are available.
+Falcon models are (as of 2023) some of the largest and most powerful open-source language models,
+and consistently rank highly in the OpenLLM leaderboard.
+Converting custom checkpoints
+
+Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully
+supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting
+your checkpoint to the new in-library format, as this should give significant improvements to stability and
+performance, especially for generation, as well as removing the need to use trust_remote_code=True!
+
+You can convert custom code checkpoints to full Transformers checkpoints using the convert_custom_code_checkpoint.py 
+script located in the
+Falcon model directory
+of the Transformers library. To use this script, simply call it with 
+python convert_custom_code_checkpoint.py --checkpoint_dir my_model. This will convert your checkpoint in-place, and
+you can immediately load it from the directory afterwards with e.g. from_pretrained(). If your model hasn't been
+uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case!
+FalconConfig
+[[autodoc]] FalconConfig
+    - all
+FalconModel
+[[autodoc]] FalconModel
+    - forward
+FalconForCausalLM
+[[autodoc]] FalconForCausalLM
+    - forward
+FalconForSequenceClassification
+[[autodoc]] FalconForSequenceClassification
+    - forward
+FalconForTokenClassification
+[[autodoc]] FalconForTokenClassification
+    - forward
+FalconForQuestionAnswering
+[[autodoc]] FalconForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65d6922dc785badcd820a7cfe8f61ac1a121d6e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+FastSpeech2Conformer
+Overview
+The FastSpeech2Conformer model was proposed with the paper Recent Developments On Espnet Toolkit Boosted By Conformer by Pengcheng Guo, Florian Boyer, Xuankai Chang, Tomoki Hayashi, Yosuke Higuchi, Hirofumi Inaguma, Naoyuki Kamo, Chenda Li, Daniel Garcia-Romero, Jiatong Shi, Jing Shi, Shinji Watanabe, Kun Wei, Wangyou Zhang, and Yuekai Zhang.
+The abstract from the original FastSpeech2 paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c255cbde0f0c079b90bb6dfd0ac27e18a243547
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+The abstract from the original FastSpeech2 paper is the following:
+Non-autoregressive text to speech (TTS) models such as FastSpeech (Ren et al., 2019) can synthesize speech significantly faster than previous autoregressive models with comparable quality. The training of FastSpeech model relies on an autoregressive teacher model for duration prediction (to provide more information as input) and knowledge distillation (to simplify the data distribution in output), which can ease the one-to-many mapping problem (i.e., multiple speech variations correspond to the same text) in TTS. However, FastSpeech has several disadvantages: 1) the teacher-student distillation pipeline is complicated and time-consuming, 2) the duration extracted from the teacher model is not accurate enough, and the target mel-spectrograms distilled from teacher model suffer from information loss due to data simplification, both of which limit the voice quality. In this paper, we propose FastSpeech 2, which addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS by 1) directly training the model with ground-truth target instead of the simplified output from teacher, and 2) introducing more variation information of speech (e.g., pitch, energy and more accurate duration) as conditional inputs. Specifically, we extract duration, pitch and energy from speech waveform and directly take them as conditional inputs in training and use predicted values in inference. We further design FastSpeech 2s, which is the first attempt to directly generate speech waveform from text in parallel, enjoying the benefit of fully end-to-end inference. Experimental results show that 1) FastSpeech 2 achieves a 3x training speed-up over FastSpeech, and FastSpeech 2s enjoys even faster inference speed; 2) FastSpeech 2 and 2s outperform FastSpeech in voice quality, and FastSpeech 2 can even surpass autoregressive models. Audio samples are available at https://speechresearch.github.io/fastspeech2/.
+This model was contributed by Connor Henderson. The original code can be found here.
+🤗 Model Architecture
+FastSpeech2's general structure with a Mel-spectrogram decoder was implemented, and the traditional transformer blocks were replaced with conformer blocks as done in the ESPnet library.
+FastSpeech2 Model Architecture
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a9b87cf5c20da649c2d405faa0708f1d4252e74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+Conformer Blocks
+
+Convolution Module
+
+🤗 Transformers Usage
+You can run FastSpeech2Conformer locally with the 🤗 Transformers library.
+
+First install the 🤗 Transformers library, g2p-en:
+
+pip install --upgrade pip
+pip install --upgrade transformers g2p-en
+
+Run inference via the Transformers modelling code with the model and hifigan separately
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerModel, FastSpeech2ConformerHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer")
+output_dict = model(input_ids, return_dict=True)
+spectrogram = output_dict["spectrogram"]
+hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+waveform = hifigan(spectrogram)
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference via the Transformers modelling code with the model and hifigan combined
+
+thon
+from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
+import soundfile as sf
+tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer")
+inputs = tokenizer("Hello, my dog is cute.", return_tensors="pt")
+input_ids = inputs["input_ids"]
+model = FastSpeech2ConformerWithHifiGan.from_pretrained("espnet/fastspeech2_conformer_with_hifigan")
+output_dict = model(input_ids, return_dict=True)
+waveform = output_dict["waveform"]
+sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
+
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac9fa3db5809d7bea19d700e4af8a19eb8d39158
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fastspeech2_conformer.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+Run inference with a pipeline and specify which vocoder to use
+thon
+from transformers import pipeline, FastSpeech2ConformerHifiGan
+import soundfile as sf
+
+vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
+synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
+speech = synthesiser("Hello, my dog is cooler than you!")
+sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
+
+FastSpeech2ConformerConfig
+[[autodoc]] FastSpeech2ConformerConfig
+FastSpeech2ConformerHifiGanConfig
+[[autodoc]] FastSpeech2ConformerHifiGanConfig
+FastSpeech2ConformerWithHifiGanConfig
+[[autodoc]] FastSpeech2ConformerWithHifiGanConfig
+FastSpeech2ConformerTokenizer
+[[autodoc]] FastSpeech2ConformerTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+FastSpeech2ConformerModel
+[[autodoc]] FastSpeech2ConformerModel
+    - forward
+FastSpeech2ConformerHifiGan
+[[autodoc]] FastSpeech2ConformerHifiGan
+    - forward
+FastSpeech2ConformerWithHifiGan
+[[autodoc]] FastSpeech2ConformerWithHifiGan
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5b95904ca2212ee86cd779efda5c74e535b17f8c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-t5.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+FLAN-T5
+Overview
+FLAN-T5 was released in the paper Scaling Instruction-Finetuned Language Models - it is an enhanced version of T5 that has been finetuned in a mixture of tasks.
+One can directly use FLAN-T5 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Pour a cup of bolognese into a large bowl and add the pasta']
+
+FLAN-T5 includes the same improvements as T5 version 1.1 (see here for the full details of the model's improvements.)
+Google has released the following variants:
+
+google/flan-t5-small
+
+google/flan-t5-base
+
+google/flan-t5-large
+
+google/flan-t5-xl
+
+google/flan-t5-xxl.
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API reference, code examples and notebooks. For more details regarding training and evaluation of the FLAN-T5, refer to the model card.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cbb237cd0dd77dc0531d1bcc599e6cee8ab023df
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flan-ul2.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+FLAN-UL2
+Overview
+Flan-UL2 is an encoder decoder model based on the T5 architecture. It uses the same configuration as the UL2 model released earlier last year. 
+It was fine tuned using the "Flan" prompt tuning and dataset collection. Similar to Flan-T5,  one can directly use FLAN-UL2 weights without finetuning the model:
+According to the original blog here are the notable improvements:
+
+The original UL2 model was only trained with receptive field of 512, which made it non-ideal for N-shot prompting where N is large.
+The Flan-UL2 checkpoint uses a receptive field of 2048 which makes it more usable for few-shot in-context learning.
+The original UL2 model also had mode switch tokens that was rather mandatory to get good performance. However, they were a little cumbersome as this requires often some changes during inference or finetuning. In this update/change, we continue training UL2 20B for an additional 100k steps (with small batch) to forget “mode tokens” before applying Flan instruction tuning. This Flan-UL2 checkpoint does not require mode tokens anymore.
+Google has released the following variants:
+
+The original checkpoints can be found here.
+Running on low resource devices
+The model is pretty heavy (~40GB in half precision) so if you just want to run the model, make sure you load your model in 8bit, and use device_map="auto" to make sure  you don't have any OOM issue!
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-ul2", load_in_8bit=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
+inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['In a large skillet, brown the ground beef and onion over medium heat. Add the garlic']
+
+Refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e70cb908d2fecefb1316b8e6f5e02a0b8254cf6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+FlauBERT
+
+Overview
+The FlauBERT model was proposed in the paper FlauBERT: Unsupervised Language Model Pre-training for French by Hang Le et al. It's a transformer model pretrained using a masked language
+modeling (MLM) objective (like BERT).
+The abstract from the paper is the following:
+Language models have become a key step to achieve state-of-the art results in many different Natural Language
+Processing (NLP) tasks. Leveraging the huge amount of unlabeled texts nowadays available, they provide an efficient way
+to pre-train continuous word representations that can be fine-tuned for a downstream task, along with their
+contextualization at the sentence level. This has been widely demonstrated for English using contextualized
+representations (Dai and Le, 2015; Peters et al., 2018; Howard and Ruder, 2018; Radford et al., 2018; Devlin et al.,
+2019; Yang et al., 2019b). In this paper, we introduce and share FlauBERT, a model learned on a very large and
+heterogeneous French corpus. Models of different sizes are trained using the new CNRS (French National Centre for
+Scientific Research) Jean Zay supercomputer. We apply our French language models to diverse NLP tasks (text
+classification, paraphrasing, natural language inference, parsing, word sense disambiguation) and show that most of the
+time they outperform other pretraining approaches. Different versions of FlauBERT as well as a unified evaluation
+protocol for the downstream tasks, called FLUE (French Language Understanding Evaluation), are shared to the research
+community for further reproducible experiments in French NLP.
+This model was contributed by formiel. The original code can be found here.
+Tips:
+- Like RoBERTa, without the sentence ordering prediction (so just trained on the MLM objective).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e397cf61fd385feace3ce82a40670221f9c53ce2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flaubert.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+FlaubertConfig
+[[autodoc]] FlaubertConfig
+FlaubertTokenizer
+[[autodoc]] FlaubertTokenizer
+
+FlaubertModel
+[[autodoc]] FlaubertModel
+    - forward
+FlaubertWithLMHeadModel
+[[autodoc]] FlaubertWithLMHeadModel
+    - forward
+FlaubertForSequenceClassification
+[[autodoc]] FlaubertForSequenceClassification
+    - forward
+FlaubertForMultipleChoice
+[[autodoc]] FlaubertForMultipleChoice
+    - forward
+FlaubertForTokenClassification
+[[autodoc]] FlaubertForTokenClassification
+    - forward
+FlaubertForQuestionAnsweringSimple
+[[autodoc]] FlaubertForQuestionAnsweringSimple
+    - forward
+FlaubertForQuestionAnswering
+[[autodoc]] FlaubertForQuestionAnswering
+    - forward
+
+TFFlaubertModel
+[[autodoc]] TFFlaubertModel
+    - call
+TFFlaubertWithLMHeadModel
+[[autodoc]] TFFlaubertWithLMHeadModel
+    - call
+TFFlaubertForSequenceClassification
+[[autodoc]] TFFlaubertForSequenceClassification
+    - call
+TFFlaubertForMultipleChoice
+[[autodoc]] TFFlaubertForMultipleChoice
+    - call
+TFFlaubertForTokenClassification
+[[autodoc]] TFFlaubertForTokenClassification
+    - call
+TFFlaubertForQuestionAnsweringSimple
+[[autodoc]] TFFlaubertForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da39cb59d3c55a7cd341ffc06a93020ea41a408e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+FLAVA
+Overview
+The FLAVA model was proposed in FLAVA: A Foundational Language And Vision Alignment Model by Amanpreet Singh, Ronghang Hu, Vedanuj Goswami, Guillaume Couairon, Wojciech Galuba, Marcus Rohrbach, and Douwe Kiela and is accepted at CVPR 2022.
+The paper aims at creating a single unified foundation model which can work across vision, language
+as well as vision-and-language multimodal tasks.
+The abstract from the paper is the following:
+State-of-the-art vision and vision-and-language models rely on large-scale visio-linguistic pretraining for obtaining good performance on a variety
+of downstream tasks. Generally, such models are often either cross-modal (contrastive) or multi-modal
+(with earlier fusion) but not both; and they often only target specific modalities or tasks. A promising
+direction would be to use a single holistic universal model, as a "foundation", that targets all modalities
+at once -- a true vision and language foundation model should be good at vision tasks, language tasks, and
+cross- and multi-modal vision and language tasks. We introduce FLAVA as such a model and demonstrate
+impressive performance on a wide range of 35 tasks spanning these target modalities.
+This model was contributed by aps. The original code can be found here.
+FlavaConfig
+[[autodoc]] FlavaConfig
+FlavaTextConfig
+[[autodoc]] FlavaTextConfig
+FlavaImageConfig
+[[autodoc]] FlavaImageConfig
+FlavaMultimodalConfig
+[[autodoc]] FlavaMultimodalConfig
+FlavaImageCodebookConfig
+[[autodoc]] FlavaImageCodebookConfig
+FlavaProcessor
+[[autodoc]] FlavaProcessor
+FlavaFeatureExtractor
+[[autodoc]] FlavaFeatureExtractor
+FlavaImageProcessor
+[[autodoc]] FlavaImageProcessor
+    - preprocess
+FlavaForPreTraining
+[[autodoc]] FlavaForPreTraining
+    - forward
+FlavaModel
+[[autodoc]] FlavaModel
+    - forward
+    - get_text_features
+    - get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2504c84f3e75bf4450a6f10437d18ded9364ef13
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_flava.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+- get_image_features
+FlavaImageCodebook
+[[autodoc]] FlavaImageCodebook
+    - forward
+    - get_codebook_indices
+    - get_codebook_probs
+FlavaTextModel
+[[autodoc]] FlavaTextModel
+    - forward
+FlavaImageModel
+[[autodoc]] FlavaImageModel
+    - forward
+FlavaMultimodalModel
+[[autodoc]] FlavaMultimodalModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff961715745a994c71641fe85522960efc8808fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+FNet
+Overview
+The FNet model was proposed in FNet: Mixing Tokens with Fourier Transforms by
+James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon. The model replaces the self-attention layer in a BERT
+model with a fourier transform which returns only the real parts of the transform. The model is significantly faster
+than the BERT model because it has fewer parameters and is more memory efficient. The model achieves about 92-97%
+accuracy of BERT counterparts on GLUE benchmark, and trains much faster than the BERT model. The abstract from the
+paper is the following:
+We show that Transformer encoder architectures can be sped up, with limited accuracy costs, by replacing the
+self-attention sublayers with simple linear transformations that "mix" input tokens. These linear mixers, along with
+standard nonlinearities in feed-forward layers, prove competent at modeling semantic relationships in several text
+classification tasks. Most surprisingly, we find that replacing the self-attention sublayer in a Transformer encoder
+with a standard, unparameterized Fourier Transform achieves 92-97% of the accuracy of BERT counterparts on the GLUE
+benchmark, but trains 80% faster on GPUs and 70% faster on TPUs at standard 512 input lengths. At longer input lengths,
+our FNet model is significantly faster: when compared to the "efficient" Transformers on the Long Range Arena
+benchmark, FNet matches the accuracy of the most accurate models, while outpacing the fastest models across all
+sequence lengths on GPUs (and across relatively shorter lengths on TPUs). Finally, FNet has a light memory footprint
+and is particularly efficient at smaller model sizes; for a fixed speed and accuracy budget, small FNet models
+outperform Transformer counterparts.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+The model was trained without an attention mask as it is based on Fourier Transform. The model was trained with 
+maximum sequence length 512 which includes pad tokens. Hence, it is highly recommended to use the same maximum 
+sequence length for fine-tuning and inference.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40539ec438f66124e7a0586ff589e410a8132b7a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fnet.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FNetConfig
+[[autodoc]] FNetConfig
+FNetTokenizer
+[[autodoc]] FNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FNetTokenizerFast
+[[autodoc]] FNetTokenizerFast
+FNetModel
+[[autodoc]] FNetModel
+    - forward
+FNetForPreTraining
+[[autodoc]] FNetForPreTraining
+    - forward
+FNetForMaskedLM
+[[autodoc]] FNetForMaskedLM
+    - forward
+FNetForNextSentencePrediction
+[[autodoc]] FNetForNextSentencePrediction
+    - forward
+FNetForSequenceClassification
+[[autodoc]] FNetForSequenceClassification
+    - forward
+FNetForMultipleChoice
+[[autodoc]] FNetForMultipleChoice
+    - forward
+FNetForTokenClassification
+[[autodoc]] FNetForTokenClassification
+    - forward
+FNetForQuestionAnswering
+[[autodoc]] FNetForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32ca853453dc53aed5a69e813600eff7b0103608
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+FocalNet
+Overview
+The FocalNet model was proposed in Focal Modulation Networks by Jianwei Yang, Chunyuan Li, Xiyang Dai, Lu Yuan, Jianfeng Gao.
+FocalNets completely replace self-attention (used in models like ViT and Swin) by a focal modulation mechanism for modeling token interactions in vision.
+The authors claim that FocalNets outperform self-attention based models with similar computational costs on the tasks of image classification, object detection, and segmentation.
+The abstract from the paper is the following:
+We propose focal modulation networks (FocalNets in short), where self-attention (SA) is completely replaced by a focal modulation mechanism for modeling token interactions in vision. Focal modulation comprises three components: (i) hierarchical contextualization, implemented using a stack of depth-wise convolutional layers, to encode visual contexts from short to long ranges, (ii) gated aggregation to selectively gather contexts for each query token based on its
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4f1dd430f678560b9e372b76abb97203771d2a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_focalnet.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+content, and (iii) element-wise modulation or affine transformation to inject the aggregated context into the query. Extensive experiments show FocalNets outperform the state-of-the-art SA counterparts (e.g., Swin and Focal Transformers) with similar computational costs on the tasks of image classification, object detection, and segmentation. Specifically, FocalNets with tiny and base size achieve 82.3% and 83.9% top-1 accuracy on ImageNet-1K. After pretrained on ImageNet-22K in 224 resolution, it attains 86.5% and 87.3% top-1 accuracy when finetuned with resolution 224 and 384, respectively. When transferred to downstream tasks, FocalNets exhibit clear superiority. For object detection with Mask R-CNN, FocalNet base trained with 1\times outperforms the Swin counterpart by 2.1 points and already surpasses Swin trained with 3\times schedule (49.0 v.s. 48.5). For semantic segmentation with UPerNet, FocalNet base at single-scale outperforms Swin by 2.4, and beats Swin at multi-scale (50.5 v.s. 49.7). Using large FocalNet and Mask2former, we achieve 58.5 mIoU for ADE20K semantic segmentation, and 57.9 PQ for COCO Panoptic Segmentation. Using huge FocalNet and DINO, we achieved 64.3 and 64.4 mAP on COCO minival and test-dev, respectively, establishing new SoTA on top of much larger attention-based models like Swinv2-G and BEIT-3.
+This model was contributed by nielsr.
+The original code can be found here.
+FocalNetConfig
+[[autodoc]] FocalNetConfig
+FocalNetModel
+[[autodoc]] FocalNetModel
+    - forward
+FocalNetForMaskedImageModeling
+[[autodoc]] FocalNetForMaskedImageModeling
+    - forward
+FocalNetForImageClassification
+[[autodoc]] FocalNetForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd5106041b29603fcff622fe866dd641c49be6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fsmt.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+FSMT
+Overview
+FSMT (FairSeq MachineTranslation) models were introduced in Facebook FAIR's WMT19 News Translation Task Submission by Nathan Ng, Kyra Yee, Alexei Baevski, Myle Ott, Michael Auli, Sergey Edunov.
+The abstract of the paper is the following:
+This paper describes Facebook FAIR's submission to the WMT19 shared news translation task. We participate in two
+language pairs and four language directions, English <-> German and English <-> Russian. Following our submission from
+last year, our baseline systems are large BPE-based transformer models trained with the Fairseq sequence modeling
+toolkit which rely on sampled back-translations. This year we experiment with different bitext data filtering schemes,
+as well as with adding filtered back-translated data. We also ensemble and fine-tune our models on domain-specific
+data, then decode using noisy channel model reranking. Our submissions are ranked first in all four directions of the
+human evaluation campaign. On En->De, our system significantly outperforms other systems as well as human translations.
+This system improves upon our WMT'18 submission by 4.5 BLEU points.
+This model was contributed by stas. The original code can be found
+here.
+Implementation Notes
+
+FSMT uses source and target vocabulary pairs that aren't combined into one. It doesn't share embeddings tokens
+  either. Its tokenizer is very similar to [XLMTokenizer] and the main model is derived from
+  [BartModel].
+
+FSMTConfig
+[[autodoc]] FSMTConfig
+FSMTTokenizer
+[[autodoc]] FSMTTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FSMTModel
+[[autodoc]] FSMTModel
+    - forward
+FSMTForConditionalGeneration
+[[autodoc]] FSMTForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52624ecbdc22ce507ae3bbda9cbb2ce3c2a644a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Funnel Transformer
+
+Overview
+The Funnel Transformer model was proposed in the paper Funnel-Transformer: Filtering out Sequential Redundancy for
+Efficient Language Processing. It is a bidirectional transformer model, like
+BERT, but with a pooling operation after each block of layers, a bit like in traditional convolutional neural networks
+(CNN) in computer vision.
+The abstract from the paper is the following:
+With the success of language pretraining, it is highly desirable to develop more efficient architectures of good
+scalability that can exploit the abundant unlabeled data at a lower cost. To improve the efficiency, we examine the
+much-overlooked redundancy in maintaining a full-length token-level presentation, especially for tasks that only
+require a single-vector presentation of the sequence. With this intuition, we propose Funnel-Transformer which
+gradually compresses the sequence of hidden states to a shorter one and hence reduces the computation cost. More
+importantly, by re-investing the saved FLOPs from length reduction in constructing a deeper or wider model, we further
+improve the model capacity. In addition, to perform token-level predictions as required by common pretraining
+objectives, Funnel-Transformer is able to recover a deep representation for each token from the reduced hidden sequence
+via a decoder. Empirically, with comparable or fewer FLOPs, Funnel-Transformer outperforms the standard Transformer on
+a wide variety of sequence-level prediction tasks, including text classification, language understanding, and reading
+comprehension.
+This model was contributed by sgugger. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..523c6cf007ec58b5763b5ab42bb0b63421865a7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+Since Funnel Transformer uses pooling, the sequence length of the hidden states changes after each block of layers. This way, their length is divided by 2, which speeds up the computation of the next hidden states.
+  The base model therefore has a final sequence length that is a quarter of the original one. This model can be used
+  directly for tasks that just require a sentence summary (like sequence classification or multiple choice). For other
+  tasks, the full model is used; this full model has a decoder that upsamples the final hidden states to the same
+  sequence length as the input.
+For tasks such as classification, this is not a problem, but for tasks like masked language modeling or token classification, we need a hidden state with the same sequence length as the original input. In those cases, the final hidden states are upsampled to the input sequence length and go through two additional layers. That's why there are two versions of each checkpoint. The version suffixed with “-base” contains only the three blocks, while the version without that suffix contains the three blocks and the upsampling head with its additional layers.
+The Funnel Transformer checkpoints are all available with a full version and a base version. The first ones should be
+  used for [FunnelModel], [FunnelForPreTraining],
+  [FunnelForMaskedLM], [FunnelForTokenClassification] and
+  [FunnelForQuestionAnswering]. The second ones should be used for
+  [FunnelBaseModel], [FunnelForSequenceClassification] and
+  [FunnelForMultipleChoice].
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+FunnelConfig
+[[autodoc]] FunnelConfig
+FunnelTokenizer
+[[autodoc]] FunnelTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+FunnelTokenizerFast
+[[autodoc]] FunnelTokenizerFast
+Funnel specific outputs
+[[autodoc]] models.funnel.modeling_funnel.FunnelForPreTrainingOutput
+[[autodoc]] models.funnel.modeling_tf_funnel.TFFunnelForPreTrainingOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d6e182ee85407bbf517a518531f7a5450cbeb41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_funnel.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+FunnelBaseModel
+[[autodoc]] FunnelBaseModel
+    - forward
+FunnelModel
+[[autodoc]] FunnelModel
+    - forward
+FunnelModelForPreTraining
+[[autodoc]] FunnelForPreTraining
+    - forward
+FunnelForMaskedLM
+[[autodoc]] FunnelForMaskedLM
+    - forward
+FunnelForSequenceClassification
+[[autodoc]] FunnelForSequenceClassification
+    - forward
+FunnelForMultipleChoice
+[[autodoc]] FunnelForMultipleChoice
+    - forward
+FunnelForTokenClassification
+[[autodoc]] FunnelForTokenClassification
+    - forward
+FunnelForQuestionAnswering
+[[autodoc]] FunnelForQuestionAnswering
+    - forward
+
+TFFunnelBaseModel
+[[autodoc]] TFFunnelBaseModel
+    - call
+TFFunnelModel
+[[autodoc]] TFFunnelModel
+    - call
+TFFunnelModelForPreTraining
+[[autodoc]] TFFunnelForPreTraining
+    - call
+TFFunnelForMaskedLM
+[[autodoc]] TFFunnelForMaskedLM
+    - call
+TFFunnelForSequenceClassification
+[[autodoc]] TFFunnelForSequenceClassification
+    - call
+TFFunnelForMultipleChoice
+[[autodoc]] TFFunnelForMultipleChoice
+    - call
+TFFunnelForTokenClassification
+[[autodoc]] TFFunnelForTokenClassification
+    - call
+TFFunnelForQuestionAnswering
+[[autodoc]] TFFunnelForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..387f2a6b86b1a996f3752e09c651bbabd4f90ade
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Fuyu
+Overview
+The Fuyu model was created by ADEPT, and authored by Rohan Bavishi, Erich Elsen, Curtis Hawthorne, Maxwell Nye, Augustus Odena, Arushi Somani, Sağnak Taşırlar. 
+The authors introduced Fuyu-8B, a decoder-only multimodal model based on the classic transformers architecture, with query and key normalization. A linear encoder is added to create multimodal embeddings from image inputs. 
+By treating image tokens like text tokens and using a special image-newline character, the model knows when an image line ends. Image positional embeddings are removed. This avoids the need for different training phases for various image resolutions. With 8 billion parameters and licensed under CC-BY-NC, Fuyu-8B is notable for its ability to handle both text and images, its impressive context size of 16K, and its overall performance.
+
+The Fuyu models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19f069c3e97b27bc395111f7a458dacee2d5c497
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget path/to/fuyu-8b-model-weights.tar
+tar -xvf fuyu-8b-model-weights.tar
+python src/transformers/models/fuyu/convert_fuyu_weights_to_hf.py  --input_dir /path/to/downloaded/fuyu/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/fuyu_8b_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Then, model can be loaded via:
+py 
+from transformers import FuyuConfig, FuyuForCausalLM
+model_config = FuyuConfig()
+model = FuyuForCausalLM(model_config).from_pretrained('/output/path')
+Inputs need to be passed through a specific Processor to have the correct formats.
+A processor requires an image_processor and a tokenizer. Hence, inputs can be loaded via:
+
+from PIL import Image
+from transformers import AutoTokenizer
+from transformers.models.fuyu.processing_fuyu import FuyuProcessor
+from transformers.models.fuyu.image_processing_fuyu import FuyuImageProcessor
+tokenizer = AutoTokenizer.from_pretrained('adept-hf-collab/fuyu-8b')
+image_processor = FuyuImageProcessor()
+processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
+text_prompt = "Generate a coco-style caption.\n"
+bus_image_url = "https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/bus.png"
+bus_image_pil = Image.open(io.BytesIO(requests.get(bus_image_url).content))
+inputs_to_model = processor(text=text_prompt, images=bus_image_pil)
+
+This model was contributed by Molbap.
+The original code can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c130247a929e00377a9d1d650611bcc5bc8a0e9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_fuyu.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+This model was contributed by Molbap.
+The original code can be found here.
+
+Fuyu uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. 
+
+The authors suggest to use the following prompt for image captioning: f"Generate a coco-style caption.\\n"
+
+FuyuConfig
+[[autodoc]] FuyuConfig
+FuyuForCausalLM
+[[autodoc]] FuyuForCausalLM
+    - forward
+FuyuImageProcessor
+[[autodoc]] FuyuImageProcessor
+    - call
+FuyuProcessor
+[[autodoc]] FuyuProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3afa411d55ccd9ae8f47ea73776f47f9f62d62d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gemma.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Gemma
+Overview
+The Gemma model was proposed in Gemma: Open Models Based on Gemini Technology and Research by Gemma Team, Google.
+Gemma models are trained on 6T tokens, and released with 2 versions, 2b and 7b.
+The abstract from the paper is the following:
+This work introduces Gemma, a new family of open language models demonstrating strong performance across academic benchmarks for language understanding, reasoning, and safety. We release two sizes of models (2 billion and 7 billion parameters), and provide both pretrained and fine-tuned checkpoints. Gemma outperforms similarly sized open models on 11 out of 18 text-based tasks, and we present comprehensive evaluations of safety and responsibility aspects of the models, alongside a detailed description of our model development. We believe the responsible release of LLMs is critical for improving the safety of frontier models, and for enabling the next wave of LLM innovations
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/gemma/convert_gemma_weights_to_hf.py 
+
+This model was contributed by Arthur Zucker, Younes Belkada, Sanchit Gandhi, Pedro Cuenca.
+GemmaConfig
+[[autodoc]] GemmaConfig
+GemmaTokenizer
+[[autodoc]] GemmaTokenizer
+GemmaTokenizerFast
+[[autodoc]] GemmaTokenizerFast
+GemmaModel
+[[autodoc]] GemmaModel
+    - forward
+GemmaForCausalLM
+[[autodoc]] GemmaForCausalLM
+    - forward
+GemmaForSequenceClassification
+[[autodoc]] GemmaForSequenceClassification
+    - forward
+GemmaForTokenClassification
+[[autodoc]] GemmaForTokenClassification
+    - forward
+FlaxGemmaModel
+[[autodoc]] FlaxGemmaModel
+    - call
+FlaxGemmaForCausalLM
+[[autodoc]] FlaxGemmaForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d00f123a7c2345c40089f767a85ea3f672e646b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+GIT
+Overview
+The GIT model was proposed in GIT: A Generative Image-to-text Transformer for Vision and Language by
+Jianfeng Wang, Zhengyuan Yang, Xiaowei Hu, Linjie Li, Kevin Lin, Zhe Gan, Zicheng Liu, Ce Liu, Lijuan Wang. GIT is a decoder-only Transformer
+that leverages CLIP's vision encoder to condition the model on vision inputs besides text. The model obtains state-of-the-art results on
+image captioning and visual question answering benchmarks.
+The abstract from the paper is the following:
+In this paper, we design and train a Generative Image-to-text Transformer, GIT, to unify vision-language tasks such as image/video captioning and question answering. While generative models provide a consistent network architecture between pre-training and fine-tuning, existing work typically contains complex structures (uni/multi-modal encoder/decoder) and depends on external modules such as object detectors/taggers and optical character recognition (OCR). In GIT, we simplify the architecture as one image encoder and one text decoder under a single language modeling task. We also scale up the pre-training data and the model size to boost the model performance. Without bells and whistles, our GIT establishes new state of the arts on 12 challenging benchmarks with a large margin. For instance, our model surpasses the human performance for the first time on TextCaps (138.2 vs. 125.5 in CIDEr). Furthermore, we present a new scheme of generation-based image classification and scene text recognition, achieving decent performance on standard benchmarks.
+
+ GIT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+GIT is implemented in a very similar way to GPT-2, the only difference being that the model is also conditioned on pixel_values.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GIT.
+
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96c6c4dc6fe75e082aece9f72f1f5f3ab0ac9fdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_git.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Demo notebooks regarding inference + fine-tuning GIT on custom data can be found here.
+See also: Causal language modeling task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+GitVisionConfig
+[[autodoc]] GitVisionConfig
+GitVisionModel
+[[autodoc]] GitVisionModel
+    - forward
+GitConfig
+[[autodoc]] GitConfig
+    - all
+GitProcessor
+[[autodoc]] GitProcessor
+    - call
+GitModel
+[[autodoc]] GitModel
+    - forward
+GitForCausalLM
+[[autodoc]] GitForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aec0906f93caab35d7a3de1e668dc882537d2317
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+GLPN
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The GLPN model was proposed in Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth  by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
+GLPN combines SegFormer's hierarchical mix-Transformer with a lightweight decoder for monocular depth estimation. The proposed decoder shows better performance than the previously proposed decoders, with considerably
+less computational complexity.
+The abstract from the paper is the following:
+Depth estimation from a single image is an important task that can be applied to various fields in computer vision, and has grown rapidly with the development of convolutional neural networks. In this paper, we propose a novel structure and training strategy for monocular depth estimation to further improve the prediction accuracy of the network. We deploy a hierarchical transformer encoder to capture and convey the global context, and design a lightweight yet powerful decoder to generate an estimated depth map while considering local connectivity. By constructing connected paths between multi-scale local features and the global decoding stream with our proposed selective feature fusion module, the network can integrate both representations and recover fine details. In addition, the proposed decoder shows better performance than the previously proposed decoders, with considerably less computational complexity. Furthermore, we improve the depth-specific augmentation method by utilizing an important observation in depth estimation to enhance the model. Our network achieves state-of-the-art performance over the challenging depth dataset NYU Depth V2. Extensive experiments have been conducted to validate and show the effectiveness of the proposed approach. Finally, our model shows better generalisation ability and robustness than other comparative models.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GLPN.
+
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eac0f635d892723bf094e2b2ab3aa56ce02a9a0c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_glpn.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Demo notebooks for [GLPNForDepthEstimation] can be found here.
+Monocular depth estimation task guide
+
+GLPNConfig
+[[autodoc]] GLPNConfig
+GLPNFeatureExtractor
+[[autodoc]] GLPNFeatureExtractor
+    - call
+GLPNImageProcessor
+[[autodoc]] GLPNImageProcessor
+    - preprocess
+GLPNModel
+[[autodoc]] GLPNModel
+    - forward
+GLPNForDepthEstimation
+[[autodoc]] GLPNForDepthEstimation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e974be9e77f43dee52ed11bac1861647f19f324e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt-sw3.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+GPT-Sw3
+Overview
+The GPT-Sw3 model was first proposed in
+Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
+by Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman,
+Fredrik Carlsson, Magnus Sahlgren.
+Since that first paper the authors have extended their work and trained new models on their new 1.2TB corpora named The Nordic Pile.
+GPT-Sw3 is a collection of large decoder-only pretrained transformer language models that were developed by AI Sweden
+in collaboration with RISE and the WASP WARA for Media and Language. GPT-Sw3 has been trained on a dataset containing
+320B tokens in Swedish, Norwegian, Danish, Icelandic, English, and programming code. The model was pretrained using a
+causal language modeling (CLM) objective utilizing the NeMo Megatron GPT implementation.
+This model was contributed by AI Sweden Models.
+Usage example
+thon
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+model = AutoModelForCausalLM.from_pretrained("AI-Sweden-Models/gpt-sw3-356m")
+input_ids = tokenizer("Träd är fina för att", return_tensors="pt")["input_ids"]
+generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=10, do_sample=True)[0]
+print(tokenizer.decode(generated_token_ids))
+Träd är fina för att de är färgstarka. Men ibland är det fint
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+The implementation uses the GPT2Model coupled with our GPTSw3Tokenizer. Refer to GPT2Model documentation 
+for API reference and examples.  
+Note that sentencepiece is required to use our tokenizer and can be installed with pip install transformers[sentencepiece] or pip install sentencepiece
+
+GPTSw3Tokenizer
+[[autodoc]] GPTSw3Tokenizer
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a3bd315f297bae4d1e5d0912be66bb8794c556f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+OpenAI GPT2
+
+Overview
+OpenAI GPT-2 model was proposed in Language Models are Unsupervised Multitask Learners by Alec
+Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever from OpenAI. It's a causal (unidirectional)
+transformer pretrained using language modeling on a very large corpus of ~40 GB of text data.
+The abstract from the paper is the following:
+GPT-2 is a large transformer-based language model with 1.5 billion parameters, trained on a dataset[1] of 8 million
+web pages. GPT-2 is trained with a simple objective: predict the next word, given all of the previous words within some
+text. The diversity of the dataset causes this simple goal to contain naturally occurring demonstrations of many tasks
+across diverse domains. GPT-2 is a direct scale-up of GPT, with more than 10X the parameters and trained on more than
+10X the amount of data.
+Write With Transformer is a webapp created and hosted by
+Hugging Face showcasing the generative capabilities of several models. GPT-2 is one of them and is available in five
+different sizes: small, medium, large, xl and a distilled version of the small checkpoint: distilgpt-2.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c205c0c6dd5127b26dcd39aec04d0ced6d162afd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+GPT-2 is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT-2 was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+The model can take the past_key_values (for PyTorch) or past (for TF) as input, which is the previously computed
+  key/value attention pairs. Using this (past_key_values or past) value prevents the model from re-computing
+  pre-computed values in the context of text generation. For PyTorch, see past_key_values argument of the
+  [GPT2Model.forward] method, or for TF the past argument of the
+  [TFGPT2Model.call] method for more information on its usage.
+Enabling the scale_attn_by_inverse_layer_idx and reorder_and_upcast_attn flags will apply the training stability
+  improvements from Mistral (for PyTorch only).
+
+Usage example
+The generate() method can be used to generate text using GPT2 model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("gpt2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "GPT2 is a model developed by OpenAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c0eaf9045a7b87b9f1123f90d04f1902432482d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using gpt2 checkpoint and the Flash Attention 2 version of the model using a sequence length of 512.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the
+official documentation
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+python
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, attn_implementation="sdpa")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca1187bdbaaec763e0352383a40f1367cd9a49e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (rtx3080ti-16GB, PyTorch 2.2.1, OS Ubuntu 22.04) using float16 with
+gpt2-large, we saw the
+following speedups during training and inference.
+Training
+| Batch size | Seq len |  Time per batch (Eager - s) | Time per batch (SDPA - s) | Speedup (%) | Eager peak mem (MB) | SDPA peak mem (MB) |    Mem saving (%) |
+|-----------:|--------:|----------------------------:|--------------------------:|------------:|--------------------:|-------------------:|------------------:|
+|          1 |     128 |                       0.039 |                     0.032 |      23.042 |             3482.32 |            3494.62 |            -0.352 |
+|          1 |     256 |                       0.073 |                     0.059 |       25.15 |             3546.66 |             3552.6 |            -0.167 |
+|          1 |     512 |                       0.155 |                     0.118 |       30.96 |              4230.1 |            3665.59 |              15.4 |
+|          1 |    1024 |                       0.316 |                     0.209 |      50.839 |             8682.26 |            4881.09 |            77.875 |
+|          2 |     128 |                        0.07 |                      0.06 |      15.324 |              3557.8 |            3545.91 |             0.335 |
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2e8da19e995fb04939adeb43409ce55c07f077e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_4.txt
@@ -0,0 +1,12 @@
+|          2 |     256 |                       0.143 |                     0.122 |       16.53 |              3901.5 |            3657.68 |             6.666 |
+|          2 |     512 |                       0.267 |                     0.213 |      25.626 |             7062.21 |            4876.47 |            44.822 |
+|          2 |    1024 |                         OOM |                     0.404 |           / |                 OOM |            8096.35 | SDPA does not OOM |
+|          4 |     128 |                       0.134 |                     0.128 |       4.412 |             3675.79 |            3648.72 |             0.742 |
+|          4 |     256 |                       0.243 |                     0.217 |      12.292 |             6129.76 |            4871.12 |            25.839 |
+|          4 |     512 |                       0.494 |                     0.406 |      21.687 |             12466.6 |            8102.64 |            53.858 |
+|          4 |    1024 |                         OOM |                     0.795 |           / |                 OOM |            14568.2 | SDPA does not OOM |
+Inference
+| Batch size | Seq len | Per token latency Eager (ms) | Per token latency SDPA (ms) | Speedup (%) | Mem Eager (MB) | Mem SDPA (MB) | Mem saved (%) |
+|-----------:|--------:|-----------------------------:|----------------------------:|------------:|---------------:|--------------:|--------------:|
+|          1 |     128 |                        7.991 |                       6.968 |      14.681 |         1685.2 |       1701.32 |        -0.947 |
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..045b829acfb684c634191e946ad4cc258b08bf49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_5.txt
@@ -0,0 +1,10 @@
+|          1 |     256 |                        8.462 |                       7.199 |      17.536 |        1745.49 |       1770.78 |        -1.428 |
+|          1 |     512 |                         8.68 |                       7.853 |      10.529 |        1907.69 |       1921.29 |        -0.708 |
+|          1 |     768 |                        9.101 |                       8.365 |       8.791 |        2032.93 |       2068.12 |        -1.701 |
+|          2 |     128 |                        9.169 |                       9.001 |       1.861 |        1803.84 |        1811.4 |        -0.418 |
+|          2 |     256 |                        9.907 |                        9.78 |       1.294 |        1907.72 |       1921.44 |        -0.714 |
+|          2 |     512 |                       11.519 |                      11.644 |      -1.071 |        2176.86 |       2197.75 |        -0.951 |
+|          2 |     768 |                       13.022 |                      13.407 |      -2.873 |         2464.3 |       2491.06 |        -1.074 |
+|          4 |     128 |                       10.097 |                       9.831 |       2.709 |        1942.25 |       1985.13 |         -2.16 |
+|          4 |     256 |                       11.599 |                      11.398 |       1.764 |        2177.28 |       2197.86 |        -0.937 |
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61c62478ca7302fe002c6e64462c40b07256434b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_6.txt
@@ -0,0 +1,4 @@
+|          4 |     512 |                       14.653 |                       14.45 |       1.411 |        2753.16 |       2772.57 |          -0.7 |
+|          4 |     768 |                       17.846 |                      17.617 |       1.299 |        3327.04 |       3343.97 |        -0.506 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28d5f54a48f3841364be8a3f919bababa4b6ec97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPT2LMHeadModel] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+[FlaxGPT2LMHeadModel] is supported by this causal language modeling example script and notebook.
+Text classification task guide
+Token classification task guide
+Causal language modeling task guide
+
+GPT2Config
+[[autodoc]] GPT2Config
+GPT2Tokenizer
+[[autodoc]] GPT2Tokenizer
+    - save_vocabulary
+GPT2TokenizerFast
+[[autodoc]] GPT2TokenizerFast
+GPT2 specific outputs
+[[autodoc]] models.gpt2.modeling_gpt2.GPT2DoubleHeadsModelOutput
+[[autodoc]] models.gpt2.modeling_tf_gpt2.TFGPT2DoubleHeadsModelOutput
+
+GPT2Model
+[[autodoc]] GPT2Model
+    - forward
+GPT2LMHeadModel
+[[autodoc]] GPT2LMHeadModel
+    - forward
+GPT2DoubleHeadsModel
+[[autodoc]] GPT2DoubleHeadsModel
+    - forward
+GPT2ForQuestionAnswering
+[[autodoc]] GPT2ForQuestionAnswering
+    - forward
+GPT2ForSequenceClassification
+[[autodoc]] GPT2ForSequenceClassification
+    - forward
+GPT2ForTokenClassification
+[[autodoc]] GPT2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad379887e4377909e364dd8994879bbbf7ba2ca1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt2.txt_chunk_8.txt
@@ -0,0 +1,23 @@
+TFGPT2Model
+[[autodoc]] TFGPT2Model
+    - call
+TFGPT2LMHeadModel
+[[autodoc]] TFGPT2LMHeadModel
+    - call
+TFGPT2DoubleHeadsModel
+[[autodoc]] TFGPT2DoubleHeadsModel
+    - call
+TFGPT2ForSequenceClassification
+[[autodoc]] TFGPT2ForSequenceClassification
+    - call
+TFSequenceClassifierOutputWithPast
+[[autodoc]] modeling_tf_outputs.TFSequenceClassifierOutputWithPast
+TFGPT2Tokenizer
+[[autodoc]] TFGPT2Tokenizer
+
+FlaxGPT2Model
+[[autodoc]] FlaxGPT2Model
+    - call
+FlaxGPT2LMHeadModel
+[[autodoc]] FlaxGPT2LMHeadModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c707f0679c65a1b3b3ae22a628d3e855918661bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+GPTBigCode
+Overview
+The GPTBigCode model was proposed in SantaCoder: don't reach for the stars! by BigCode. The listed authors are: Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+The abstract from the paper is the following:
+The BigCode project is an open-scientific collaboration working on the responsible development of large language models for code. This tech report describes the progress of the collaboration until December 2022, outlining the current state of the Personally Identifiable Information (PII) redaction pipeline, the experiments conducted to de-risk the model architecture, and the experiments investigating better preprocessing methods for the training data. We train 1.1B parameter models on the Java, JavaScript, and Python subsets of The Stack and evaluate them on the MultiPL-E text-to-code benchmark. We find that more aggressive filtering of near-duplicates can further boost performance and, surprisingly, that selecting files from repositories with 5+ GitHub stars deteriorates performance significantly. Our best model outperforms previous open-source multilingual code generation models (InCoder-6.7B and CodeGen-Multi-2.7B) in both left-to-right generation and infilling on the Java, JavaScript, and Python portions of MultiPL-E, despite being a substantially smaller model. All models are released under an OpenRAIL license at this https URL.
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..205d644a7db76e20e0aa31ba8f29ff1f368978f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+The model is an optimized GPT2 model with support for Multi-Query Attention.
+Implementation details
+The main differences compared to GPT2.
+- Added support for Multi-Query Attention.
+- Use gelu_pytorch_tanh instead of classic gelu.
+- Avoid unnecessary synchronizations (this has since been added to GPT2 in #20061, but wasn't in the reference codebase).
+- Use Linear layers instead of Conv1D (good speedup but makes the checkpoints incompatible).
+- Merge _attn and _upcast_and_reordered_attn. Always merge the matmul with scaling. Rename reorder_and_upcast_attn->attention_softmax_in_fp32
+- Cache the attention mask value to avoid recreating it every time.
+- Use jit to fuse the attention fp32 casting, masking, softmax, and scaling.
+- Combine the attention and causal masks into a single one, pre-computed for the whole model instead of every layer.
+- Merge the key and value caches into one (this changes the format of layer_past/ present, does it risk creating problems?)
+- Use the memory layout (self.num_heads, 3, self.head_dim) instead of (3, self.num_heads, self.head_dim) for the QKV tensor with MHA. (prevents an overhead with the merged key and values, but makes the checkpoints incompatible with the original openai-community/gpt2 model).
+You can read more about the optimizations in the original pull request
+Combining Starcoder and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b02c62f34ce686c880cba8c3be3ca117cc781a73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_bigcode.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("bigcode/gpt_bigcode-santacoder", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/gpt_bigcode-santacoder")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def hello_world():\n    print("hello world")\n\nif name == "main":\n    print("hello world")\n<|endoftext|>'
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using bigcode/starcoder checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+GPTBigCodeConfig
+[[autodoc]] GPTBigCodeConfig
+GPTBigCodeModel
+[[autodoc]] GPTBigCodeModel
+    - forward
+GPTBigCodeForCausalLM
+[[autodoc]] GPTBigCodeForCausalLM
+    - forward
+GPTBigCodeForSequenceClassification
+[[autodoc]] GPTBigCodeForSequenceClassification
+    - forward
+GPTBigCodeForTokenClassification
+[[autodoc]] GPTBigCodeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0e4d8625c5179a37037359c5aef97b8a7176898
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+GPT Neo
+Overview
+The GPTNeo model was released in the EleutherAI/gpt-neo repository by Sid
+Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy. It is a GPT2 like causal language model trained on the
+Pile dataset.
+The architecture is similar to GPT2 except that GPT Neo uses local attention in every other layer with a window size of
+256 tokens.
+This model was contributed by valhalla.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoForCausalLM, GPT2Tokenizer
+model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+Combining GPT-Neo and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature, and make sure your hardware is compatible with Flash-Attention 2. More details are available here concerning the installation.
+Make sure as well to load your model in half-precision (e.g. torch.float16).
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cd67813e141865bd68750b55e09f730d2d102f5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neo.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
+prompt = "def hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"def hello_world():\n    >>> run_script("hello.py")\n    >>> exit(0)\n<|endoftext|>"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using EleutherAI/gpt-neo-2.7B checkpoint and the Flash Attention 2 version of the model.
+Note that for GPT-Neo it is not possible to train / run on very long context as the max position embeddings is limited to 2048 - but this is applicable to all gpt-neo models and not specific to FA-2
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+GPTNeoConfig
+[[autodoc]] GPTNeoConfig
+
+GPTNeoModel
+[[autodoc]] GPTNeoModel
+    - forward
+GPTNeoForCausalLM
+[[autodoc]] GPTNeoForCausalLM
+    - forward
+GPTNeoForQuestionAnswering
+[[autodoc]] GPTNeoForQuestionAnswering
+    - forward
+GPTNeoForSequenceClassification
+[[autodoc]] GPTNeoForSequenceClassification
+    - forward
+GPTNeoForTokenClassification
+[[autodoc]] GPTNeoForTokenClassification
+    - forward
+
+FlaxGPTNeoModel
+[[autodoc]] FlaxGPTNeoModel
+    - call
+FlaxGPTNeoForCausalLM
+[[autodoc]] FlaxGPTNeoForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b0d8c8992699292654a4400f95bbf29d18ed17f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+GPT-NeoX
+Overview
+We introduce GPT-NeoX-20B, a 20 billion parameter autoregressive language model trained on the Pile, whose weights will
+be made freely and openly available to the public through a permissive license. It is, to the best of our knowledge,
+the largest dense autoregressive model that has publicly available weights at the time of submission. In this work,
+we describe GPT-NeoX-20B's architecture and training and evaluate its performance on a range of language-understanding,
+mathematics, and knowledge-based tasks. We find that GPT-NeoX-20B is a particularly powerful few-shot reasoner and
+gains far more in performance when evaluated five-shot than similarly sized GPT-3 and FairSeq models. We open-source
+the training and evaluation code, as well as the model weights, at https://github.com/EleutherAI/gpt-neox.
+Development of the model was led by Sid Black, Stella Biderman and Eric Hallahan, and the model was trained with
+generous the support of CoreWeave.
+GPT-NeoX-20B was trained with fp16, thus it is recommended to initialize the model as follows:
+python
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b").half().cuda()
+GPT-NeoX-20B also has a different tokenizer from the one used in GPT-J-6B and GPT-Neo. The new tokenizer allocates
+additional tokens to whitespace characters, making the model more suitable for certain tasks like code generation.
+Usage example
+The generate() method can be used to generate text using GPT Neo model.
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("EleutherAI/gpt-neox-20b")
+prompt = "GPTNeoX20B is a 20B-parameter autoregressive Transformer model developed by EleutherAI."
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15c62dac639445533e9396fed1cb79ed9f73516f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast
+
+model = GPTNeoXForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using stockmark/gpt-neox-japanese-1.4b checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXConfig
+[[autodoc]] GPTNeoXConfig
+GPTNeoXTokenizerFast
+[[autodoc]] GPTNeoXTokenizerFast
+GPTNeoXModel
+[[autodoc]] GPTNeoXModel
+    - forward
+GPTNeoXForCausalLM
+[[autodoc]] GPTNeoXForCausalLM
+    - forward
+GPTNeoXForQuestionAnswering
+[[autodoc]] GPTNeoXForQuestionAnswering
+    - forward
+GPTNeoXForSequenceClassification
+[[autodoc]] GPTNeoXForSequenceClassification
+    - forward
+GPTNeoXForTokenClassification
+[[autodoc]] GPTNeoXForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df55cd13f19eb1f3a9b4362b7e2c3005972de301
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gpt_neox_japanese.txt_chunk_0.txt
@@ -0,0 +1,40 @@
+GPT-NeoX-Japanese
+Overview
+We introduce GPT-NeoX-Japanese, which is an autoregressive language model for Japanese, trained on top of https://github.com/EleutherAI/gpt-neox.
+Japanese is a unique language with its large vocabulary and a combination of hiragana, katakana, and kanji writing scripts.
+To address this distinct structure of the Japanese language, we use a special sub-word tokenizer. We are very grateful to tanreinama for open-sourcing this incredibly helpful tokenizer.
+Following the recommendations from Google's research on PaLM, we have removed bias parameters from transformer blocks, achieving better model performance. Please refer this article in detail.
+Development of the model was led by Shinya Otani, Takayoshi Makabe, Anuj Arora, and Kyo Hattori from ABEJA, Inc.. For more information on this model-building activity, please refer here (ja).
+Usage example
+The generate() method can be used to generate text using GPT NeoX Japanese model.
+thon
+
+from transformers import GPTNeoXJapaneseForCausalLM, GPTNeoXJapaneseTokenizer
+model = GPTNeoXJapaneseForCausalLM.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+tokenizer = GPTNeoXJapaneseTokenizer.from_pretrained("abeja/gpt-neox-japanese-2.7b")
+prompt = "人とAIが協調するためには、"
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)[0]
+print(gen_text)
+人とAIが協調するためには、AIと人が共存し、AIを正しく理解する必要があります。
+
+Resources
+
+Causal language modeling task guide
+
+GPTNeoXJapaneseConfig
+[[autodoc]] GPTNeoXJapaneseConfig
+GPTNeoXJapaneseTokenizer
+[[autodoc]] GPTNeoXJapaneseTokenizer
+GPTNeoXJapaneseModel
+[[autodoc]] GPTNeoXJapaneseModel
+    - forward
+GPTNeoXJapaneseForCausalLM
+[[autodoc]] GPTNeoXJapaneseForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f817d9ac52c4fd22e812fb001c6d74420026372
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+GPT-J
+Overview
+The GPT-J model was released in the kingoflolz/mesh-transformer-jax repository by Ben Wang and Aran Komatsuzaki. It is a GPT-2-like
+causal language model trained on the Pile dataset.
+This model was contributed by Stella Biderman.
+Usage tips
+
+To load GPT-J in float32 one would need at least 2x model size
+  RAM: 1x for initial weights and another 1x to load the checkpoint. So for GPT-J it would take at least 48GB
+  RAM to just load the model. To reduce the RAM usage there are a few options. The torch_dtype argument can be
+  used to initialize the model in half-precision on a CUDA device only. There is also a fp16 branch which stores the fp16 weights,
+  which could be used to further minimize the RAM usage:
+
+thon
+
+from transformers import GPTJForCausalLM
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained(
+     "EleutherAI/gpt-j-6B",
+     revision="float16",
+     torch_dtype=torch.float16,
+ ).to(device)
+
+The model should fit on 16GB GPU for inference. For training/fine-tuning it would take much more GPU RAM. Adam
+  optimizer for example makes four copies of the model: model, gradients, average and squared average of the gradients.
+  So it would need at least 4x model size GPU memory, even with mixed precision as gradient updates are in fp32. This
+  is not including the activations and data batches, which would again require some more GPU RAM. So one should explore
+  solutions such as DeepSpeed, to train/fine-tune the model. Another option is to use the original codebase to
+  train/fine-tune the model on TPU and then convert the model to Transformers format for inference. Instructions for
+  that could be found here
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7a296bb35a1ee65f90dddbb7e42fbb203128fd5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+Although the embedding matrix has a size of 50400, only 50257 entries are used by the GPT-2 tokenizer. These extra
+  tokens are added for the sake of efficiency on TPUs. To avoid the mismatch between embedding matrix size and vocab
+  size, the tokenizer for GPT-J contains 143 extra tokens
+  <|extratoken_1|> <|extratoken_143|>, so the vocab_size of tokenizer also becomes 50400.
+
+Usage examples
+The [~generation.GenerationMixin.generate] method can be used to generate text using GPT-J
+model.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
+
+or in float16 precision:
+thon
+
+from transformers import GPTJForCausalLM, AutoTokenizer
+import torch
+device = "cuda"
+model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", torch_dtype=torch.float16).to(device)
+tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
+prompt = (
+     "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
+     "previously unexplored valley, in the Andes Mountains. Even more surprising to the "
+     "researchers was the fact that the unicorns spoke perfect English."
+ )
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+gen_tokens = model.generate(
+     input_ids,
+     do_sample=True,
+     temperature=0.9,
+     max_length=100,
+ )
+gen_text = tokenizer.batch_decode(gen_tokens)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..309cb4407f7004778ec86caef4d20de9fc8b9488
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GPT-J. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Description of GPT-J.
+A blog on how to Deploy GPT-J 6B for inference using Hugging Face Transformers and Amazon SageMaker.
+A blog on how to Accelerate GPT-J inference with DeepSpeed-Inference on GPUs.
+A blog post introducing GPT-J-6B: 6B JAX-Based Transformer. 🌎
+A notebook for GPT-J-6B Inference Demo. 🌎
+Another notebook demonstrating Inference with GPT-J-6B.  
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[GPTJForCausalLM] is supported by this causal language modeling example script, text generation example script, and notebook.
+[TFGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxGPTJForCausalLM] is supported by this causal language modeling example script and notebook.
+
+Documentation resources
+- Text classification task guide
+- Question answering task guide
+- Causal language modeling task guide
+GPTJConfig
+[[autodoc]] GPTJConfig
+    - all
+
+GPTJModel
+[[autodoc]] GPTJModel
+    - forward
+GPTJForCausalLM
+[[autodoc]] GPTJForCausalLM
+    - forward
+GPTJForSequenceClassification
+[[autodoc]] GPTJForSequenceClassification
+    - forward
+GPTJForQuestionAnswering
+[[autodoc]] GPTJForQuestionAnswering
+    - forward
+
+TFGPTJModel
+[[autodoc]] TFGPTJModel
+    - call
+TFGPTJForCausalLM
+[[autodoc]] TFGPTJForCausalLM
+    - call
+TFGPTJForSequenceClassification
+[[autodoc]] TFGPTJForSequenceClassification
+    - call
+TFGPTJForQuestionAnswering
+[[autodoc]] TFGPTJForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c2b135dd1b2ee390ee1560a325c2681bce86f51
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptj.txt_chunk_3.txt
@@ -0,0 +1,6 @@
+FlaxGPTJModel
+[[autodoc]] FlaxGPTJModel
+    - call
+FlaxGPTJForCausalLM
+[[autodoc]] FlaxGPTJForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..761f03e87f78e9718d5bf643131297c43cdf3567
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+GPTSAN-japanese
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The GPTSAN-japanese model was released in the repository by Toshiyuki Sakamoto (tanreinama).
+GPTSAN is a Japanese language model using Switch Transformer. It has the same structure as the model introduced as Prefix LM
+in the T5 paper, and support both Text Generation and Masked Language Modeling tasks. These basic tasks similarly can
+fine-tune for translation or summarization.
+Usage example
+The generate() method can be used to generate text using GPTSAN-Japanese model.
+thon
+
+from transformers import AutoModel, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("Tanrei/GPTSAN-japanese")
+model = AutoModel.from_pretrained("Tanrei/GPTSAN-japanese").cuda()
+x_tok = tokenizer("は、", prefix_text="織田信長", return_tensors="pt")
+torch.manual_seed(0)
+gen_tok = model.generate(x_tok.input_ids.cuda(), token_type_ids=x_tok.token_type_ids.cuda(), max_new_tokens=20)
+tokenizer.decode(gen_tok[0])
+'織田信長は、2004年に『戦国BASARA』のために、豊臣秀吉'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20ce971dd22c01abece6f534053e75a92618ea2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+GPTSAN Features
+GPTSAN has some unique features. It has a model structure of Prefix-LM. It works as a shifted Masked Language Model for Prefix Input tokens. Un-prefixed inputs behave like normal generative models.
+The Spout vector is a GPTSAN specific input. Spout is pre-trained with random inputs, but you can specify a class of text or an arbitrary vector during fine-tuning. This allows you to indicate the tendency of the generated text.
+GPTSAN has a sparse Feed Forward based on Switch-Transformer. You can also add other layers and train them partially. See the original GPTSAN repository for details.
+Prefix-LM Model
+GPTSAN has the structure of the model named Prefix-LM in the T5 paper. (The original GPTSAN repository calls it hybrid)
+In GPTSAN, the Prefix part of Prefix-LM, that is, the input position that can be referenced by both tokens, can be specified with any length.
+Arbitrary lengths can also be specified differently for each batch.
+This length applies to the text entered in prefix_text for the tokenizer.
+The tokenizer returns the mask of the Prefix part of Prefix-LM as token_type_ids.
+The model treats the part where token_type_ids is 1 as a Prefix part, that is, the input can refer to both tokens before and after.
+Usage tips
+Specifying the Prefix part is done with a mask passed to self-attention.
+When token_type_ids=None or all zero, it is equivalent to regular causal mask
+for example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5efc62a7095dacc8d125a3efe7bdca4fd84bef1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+x_token = tokenizer("ｱｲｳｴ")
+input_ids:      | SOT | SEG | ｱ | ｲ | ｳ | ｴ |
+token_type_ids: | 1   | 0   | 0 | 0 | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 0 0 0 0 0 |
+SEG | 1 1 0 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
+x_token = tokenizer("", prefix_text="ｱｲｳｴ")
+input_ids:      | SOT | ｱ | ｲ | ｳ | ｴ | SEG |
+token_type_ids: | 1   | 1 | 1 | 1 | 1 | 0  |
+prefix_lm_mask:
+SOT | 1 1 1 1 1 0 |
+ｱ   | 1 1 1 1 1 0 |
+ｲ   | 1 1 1 1 1 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 0 |
+SEG | 1 1 1 1 1 1 |
+x_token = tokenizer("ｳｴ", prefix_text="ｱｲ")
+input_ids:      | SOT | ｱ | ｲ | SEG | ｳ | ｴ |
+token_type_ids: | 1   | 1 | 1 | 0   | 0 | 0 |
+prefix_lm_mask:
+SOT | 1 1 1 0 0 0 |
+ｱ   | 1 1 1 0 0 0 |
+ｲ   | 1 1 1 0 0 0 |
+SEG | 1 1 1 1 0 0 |
+ｳ   | 1 1 1 1 1 0 |
+ｴ   | 1 1 1 1 1 1 |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a7711106686d5f0f7655e05dd094f04a385c61de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_gptsan-japanese.txt_chunk_3.txt
@@ -0,0 +1,14 @@
+Spout Vector
+A Spout Vector is a special vector for controlling text generation.
+This vector is treated as the first embedding in self-attention to bring extraneous attention to the generated tokens.
+In the pre-trained model published from Tanrei/GPTSAN-japanese, the Spout Vector is a 128-dimensional vector that passes through 8 fully connected layers in the model and is projected into the space acting as external attention.
+The Spout Vector projected by the fully connected layer is split to be passed to all self-attentions.
+GPTSanJapaneseConfig
+[[autodoc]] GPTSanJapaneseConfig
+GPTSanJapaneseTokenizer
+[[autodoc]] GPTSanJapaneseTokenizer
+GPTSanJapaneseModel
+[[autodoc]] GPTSanJapaneseModel
+GPTSanJapaneseForConditionalGeneration
+[[autodoc]] GPTSanJapaneseForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c4a95bca13a806274601d7dacf9b601e8bb7c34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Graphormer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d176cf06edcd98f60070e34b78a388b96289ac9f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_graphormer.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Overview
+The Graphormer model was proposed in Do Transformers Really Perform Bad for Graph Representation?  by
+Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen and Tie-Yan Liu. It is a Graph Transformer model, modified to allow computations on graphs instead of text sequences by generating embeddings and features of interest during preprocessing and collation, then using a modified attention.
+The abstract from the paper is the following:
+The Transformer architecture has become a dominant choice in many domains, such as natural language processing and computer vision. Yet, it has not achieved competitive performance on popular leaderboards of graph-level prediction compared to mainstream GNN variants. Therefore, it remains a mystery how Transformers could perform well for graph representation learning. In this paper, we solve this mystery by presenting Graphormer, which is built upon the standard Transformer architecture, and could attain excellent results on a broad range of graph representation learning tasks, especially on the recent OGB Large-Scale Challenge. Our key insight to utilizing Transformer in the graph is the necessity of effectively encoding the structural information of a graph into the model. To this end, we propose several simple yet effective structural encoding methods to help Graphormer better model graph-structured data. Besides, we mathematically characterize the expressive power of Graphormer and exhibit that with our ways of encoding the structural information of graphs, many popular GNN variants could be covered as the special cases of Graphormer.
+This model was contributed by clefourrier. The original code can be found here.
+Usage tips
+This model will not work well on large graphs (more than 100 nodes/edges), as it will make the memory explode.
+You can reduce the batch size, increase your RAM, or decrease the UNREACHABLE_NODE_DISTANCE parameter in algos_graphormer.pyx, but it will be hard to go above 700 nodes/edges.
+This model does not use a tokenizer, but instead a special collator during training.
+GraphormerConfig
+[[autodoc]] GraphormerConfig
+GraphormerModel
+[[autodoc]] GraphormerModel
+    - forward
+GraphormerForGraphClassification
+[[autodoc]] GraphormerForGraphClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2df1151092cca30f3feae68ebe1c005d4f5658e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+Grounding DINO
+Overview
+The Grounding DINO model was proposed in Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection by Shilong Liu, Zhaoyang Zeng, Tianhe Ren, Feng Li, Hao Zhang, Jie Yang, Chunyuan Li, Jianwei Yang, Hang Su, Jun Zhu, Lei Zhang. Grounding DINO extends a closed-set object detection model with a text encoder, enabling open-set object detection. The model achieves remarkable results, such as 52.5 AP on COCO zero-shot.
+The abstract from the paper is the following:
+In this paper, we present an open-set object detector, called Grounding DINO, by marrying Transformer-based detector DINO with grounded pre-training, which can detect arbitrary objects with human inputs such as category names or referring expressions. The key solution of open-set object detection is introducing language to a closed-set detector for open-set concept generalization. To effectively fuse language and vision modalities, we conceptually divide a closed-set detector into three phases and propose a tight fusion solution, which includes a feature enhancer, a language-guided query selection, and a cross-modality decoder for cross-modality fusion. While previous works mainly evaluate open-set object detection on novel categories, we propose to also perform evaluations on referring expression comprehension for objects specified with attributes. Grounding DINO performs remarkably well on all three settings, including benchmarks on COCO, LVIS, ODinW, and RefCOCO/+/g. Grounding DINO achieves a 52.5 AP on the COCO detection zero-shot transfer benchmark, i.e., without any training data from COCO. It sets a new record on the ODinW zero-shot benchmark with a mean 26.1 AP.
+
+ Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a7d4e116b295be6455ea88ae75dd7d1846f408f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Grounding DINO overview. Taken from the original paper. 
+This model was contributed by EduardoPacheco and nielsr.
+The original code can be found here.
+Usage tips
+
+One can use [GroundingDinoProcessor] to prepare image-text pairs for the model.
+To separate classes in the text use a period e.g. "a cat. a dog."
+When using multiple classes (e.g. "a cat. a dog."), use post_process_grounded_object_detection from [GroundingDinoProcessor] to post process outputs. Since, the labels returned from post_process_object_detection represent the indices from the model dimension where prob > threshold.
+
+Here's how to use the model for zero-shot object detection:
+thon
+import requests
+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection, 
+model_id = "IDEA-Research/grounding-dino-tiny"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
+image_url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+Check for cats and remote controls
+text = "a cat. a remote control."
+inputs = processor(images=image, text=text, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+results = processor.post_process_grounded_object_detection(
+    outputs,
+    inputs.input_ids,
+    box_threshold=0.4,
+    text_threshold=0.3,
+    target_sizes=[image.size[::-1]]
+)
+
+Grounded SAM
+One can combine Grounding DINO with the Segment Anything model for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
+
+ Grounded SAM overview. Taken from the original repository. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Grounding DINO. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..014a9fd3cce0874f81286a2bd315717172f9189a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_grounding-dino.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Demo notebooks regarding inference with Grounding DINO as well as combining it with SAM can be found here. 🌎
+
+GroundingDinoImageProcessor
+[[autodoc]] GroundingDinoImageProcessor
+    - preprocess
+    - post_process_object_detection
+GroundingDinoProcessor
+[[autodoc]] GroundingDinoProcessor
+    - post_process_grounded_object_detection
+GroundingDinoConfig
+[[autodoc]] GroundingDinoConfig
+GroundingDinoModel
+[[autodoc]] GroundingDinoModel
+    - forward
+GroundingDinoForObjectDetection
+[[autodoc]] GroundingDinoForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f02fe33b3467e4d945813f163e82d56fe97a066d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+GroupViT
+Overview
+The GroupViT model was proposed in GroupViT: Semantic Segmentation Emerges from Text Supervision by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
+Inspired by CLIP, GroupViT is a vision-language model that can perform zero-shot semantic segmentation on any given vocabulary categories.
+The abstract from the paper is the following:
+Grouping and recognition are important components of visual scene understanding, e.g., for object detection and semantic segmentation. With end-to-end deep learning systems, grouping of image regions usually happens implicitly via top-down supervision from pixel-level recognition labels. Instead, in this paper, we propose to bring back the grouping mechanism into deep networks, which allows semantic segments to emerge automatically with only text supervision. We propose a hierarchical Grouping Vision Transformer (GroupViT), which goes beyond the regular grid structure representation and learns to group image regions into progressively larger arbitrary-shaped segments. We train GroupViT jointly with a text encoder on a large-scale image-text dataset via contrastive losses. With only text supervision and without any pixel-level annotations, GroupViT learns to group together semantic regions and successfully transfers to the task of semantic segmentation in a zero-shot manner, i.e., without any further fine-tuning. It achieves a zero-shot accuracy of 52.3% mIoU on the PASCAL VOC 2012 and 22.4% mIoU on PASCAL Context datasets, and performs competitively to state-of-the-art transfer-learning methods requiring greater levels of supervision.
+This model was contributed by xvjiarui. The TensorFlow version was contributed by ariG23498 with the help of Yih-Dar SHIEH, Amy Roberts, and Joao Gante.
+The original code can be found here.
+Usage tips
+
+You may specify output_segmentation=True in the forward of GroupViTModel to get the segmentation logits of input texts. 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with GroupViT.
+
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..935a88929950964de947d6e2b3f8ba0ae381d89e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_groupvit.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+The quickest way to get started with GroupViT is by checking the example notebooks (which showcase zero-shot segmentation inference).
+One can also check out the HuggingFace Spaces demo to play with GroupViT. 
+
+GroupViTConfig
+[[autodoc]] GroupViTConfig
+    - from_text_vision_configs
+GroupViTTextConfig
+[[autodoc]] GroupViTTextConfig
+GroupViTVisionConfig
+[[autodoc]] GroupViTVisionConfig
+
+GroupViTModel
+[[autodoc]] GroupViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+GroupViTTextModel
+[[autodoc]] GroupViTTextModel
+    - forward
+GroupViTVisionModel
+[[autodoc]] GroupViTVisionModel
+    - forward
+
+TFGroupViTModel
+[[autodoc]] TFGroupViTModel
+    - call
+    - get_text_features
+    - get_image_features
+TFGroupViTTextModel
+[[autodoc]] TFGroupViTTextModel
+    - call
+TFGroupViTVisionModel
+[[autodoc]] TFGroupViTVisionModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..224e3f96213609b489aa65ecc0306586396b7a0a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+HerBERT
+Overview
+The HerBERT model was proposed in KLEJ: Comprehensive Benchmark for Polish Language Understanding by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and
+Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic
+masking of whole words.
+The abstract from the paper is the following:
+In recent years, a series of Transformer-based models unlocked major improvements in general natural language
+understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which
+allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of
+languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language
+understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing
+datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new
+sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and
+promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and
+applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language,
+which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an
+extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based
+models.
+This model was contributed by rmroczkowski. The original code can be found
+here.
+Usage example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398f90996eb2cfe986cf0fadc5ef8853f5f027b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_herbert.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+from transformers import HerbertTokenizer, RobertaModel
+tokenizer = HerbertTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = RobertaModel.from_pretrained("allegro/herbert-klej-cased-v1")
+encoded_input = tokenizer.encode("Kto ma lepszą sztukę, ma lepszy rząd – to jasne.", return_tensors="pt")
+outputs = model(encoded_input)
+HerBERT can also be loaded using AutoTokenizer and AutoModel:
+import torch
+from transformers import AutoModel, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-klej-cased-tokenizer-v1")
+model = AutoModel.from_pretrained("allegro/herbert-klej-cased-v1")
+
+Herbert implementation is the same as BERT except for the tokenization method. Refer to BERT documentation 
+for API reference and examples.  
+
+HerbertTokenizer
+[[autodoc]] HerbertTokenizer
+HerbertTokenizerFast
+[[autodoc]] HerbertTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9deefc0b814ce33c0534258b57846f204f7b20a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Hubert
+Overview
+Hubert was proposed in HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan
+Salakhutdinov, Abdelrahman Mohamed.
+The abstract from the paper is the following:
+Self-supervised approaches for speech representation learning are challenged by three unique problems: (1) there are
+multiple sound units in each input utterance, (2) there is no lexicon of input sound units during the pre-training
+phase, and (3) sound units have variable lengths with no explicit segmentation. To deal with these three problems, we
+propose the Hidden-Unit BERT (HuBERT) approach for self-supervised speech representation learning, which utilizes an
+offline clustering step to provide aligned target labels for a BERT-like prediction loss. A key ingredient of our
+approach is applying the prediction loss over the masked regions only, which forces the model to learn a combined
+acoustic and language model over the continuous inputs. HuBERT relies primarily on the consistency of the unsupervised
+clustering step rather than the intrinsic quality of the assigned cluster labels. Starting with a simple k-means
+teacher of 100 clusters, and using two iterations of clustering, the HuBERT model either matches or improves upon the
+state-of-the-art wav2vec 2.0 performance on the Librispeech (960h) and Libri-light (60,000h) benchmarks with 10min, 1h,
+10h, 100h, and 960h fine-tuning subsets. Using a 1B parameter model, HuBERT shows up to 19% and 13% relative WER
+reduction on the more challenging dev-other and test-other evaluation subsets.
+This model was contributed by patrickvonplaten.
+Usage tips
+
+Hubert is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Hubert model was fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43b47ac3c7aaf24694265d99a7e1209b5cba1be9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_hubert.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of facebook/hubert-large-ls960-ft, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the librispeech_asr clean validation split: 
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/hubert-large-ls960-ft model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+HubertConfig
+[[autodoc]] HubertConfig
+
+HubertModel
+[[autodoc]] HubertModel
+    - forward
+HubertForCTC
+[[autodoc]] HubertForCTC
+    - forward
+HubertForSequenceClassification
+[[autodoc]] HubertForSequenceClassification
+    - forward
+
+TFHubertModel
+[[autodoc]] TFHubertModel
+    - call
+TFHubertForCTC
+[[autodoc]] TFHubertForCTC
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4abe6fd455e076c568341aa87995f7ecfc8dc1a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+I-BERT
+Overview
+The I-BERT model was proposed in I-BERT: Integer-only BERT Quantization by
+Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It's a quantized version of RoBERTa running
+inference up to four times faster.
+The abstract from the paper is the following:
+Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language
+Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for
+efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this,
+previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot
+efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM
+processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes
+the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for
+nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT
+inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using
+RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to
+the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for
+INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has
+been open-sourced.
+This model was contributed by kssteven. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c103b36d1f3a7876e0e01af2f9cab66977b6d662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ibert.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+IBertConfig
+[[autodoc]] IBertConfig
+IBertModel
+[[autodoc]] IBertModel
+    - forward
+IBertForMaskedLM
+[[autodoc]] IBertForMaskedLM
+    - forward
+IBertForSequenceClassification
+[[autodoc]] IBertForSequenceClassification
+    - forward
+IBertForMultipleChoice
+[[autodoc]] IBertForMultipleChoice
+    - forward
+IBertForTokenClassification
+[[autodoc]] IBertForTokenClassification
+    - forward
+IBertForQuestionAnswering
+[[autodoc]] IBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ee78c0b1949556ecf72170b7779eff09c39fc130
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+IDEFICS
+Overview
+The IDEFICS model was proposed in OBELICS: An Open Web-Scale Filtered Dataset of Interleaved Image-Text Documents
+ by Hugo Laurençon, Lucile Saulnier, Léo Tronchon, Stas Bekman, Amanpreet Singh, Anton Lozhkov, Thomas Wang, Siddharth Karamcheti, Alexander M. Rush, Douwe Kiela, Matthieu Cord, Victor Sanh
+The abstract from the paper is the following:
+Large multimodal models trained on natural documents, which interleave images and text, outperform models trained on image-text pairs on various multimodal benchmarks that require reasoning over one or multiple images to generate a text. However, the datasets used to train these models have not been released, and the collection process has not been fully specified. We introduce the OBELICS dataset, an open web-scale filtered dataset of interleaved image-text documents comprising 141 million web pages extracted from Common Crawl, 353 million associated images, and 115 billion text tokens. We describe the dataset creation process, present comprehensive filtering rules, and provide an analysis of the dataset's content. To show the viability of OBELISC, we train an 80 billion parameters vision and language model on the dataset and obtain competitive performance on various multimodal benchmarks. We release the code to reproduce the dataset along with the dataset itself.
+This model was contributed by HuggingFaceM4. The original code can be found here. (TODO: don't have a public link yet).
+
+IDEFICS modeling code in Transformers is for finetuning and inferencing the pre-trained IDEFICS models.
+To train a new IDEFICS model from scratch use the m4 codebase (a link will be provided once it's made public)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1140157724c5903d62c404590097a96c54a5b7be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+IdeficsConfig
+[[autodoc]] IdeficsConfig
+IdeficsModel
+[[autodoc]] IdeficsModel
+    - forward
+IdeficsForVisionText2Text
+[[autodoc]] IdeficsForVisionText2Text
+    - forward
+TFIdeficsModel
+[[autodoc]] TFIdeficsModel
+    - call
+TFIdeficsForVisionText2Text
+[[autodoc]] TFIdeficsForVisionText2Text
+    - call
+IdeficsImageProcessor
+[[autodoc]] IdeficsImageProcessor
+    - preprocess
+IdeficsProcessor
+[[autodoc]] IdeficsProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47544b3079571faad79da7f9ffdb9de6616d0d6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Idefics2
+Overview
+The Idefics2 model was proposed in What matters when building vision-language models? by Léo Tronchon, Hugo Laurencon, Victor Sanh. The accompanying blog post can be found here.
+Idefics2 is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text
+outputs. The model can answer questions about images, describe visual content, create stories grounded on multiple
+images, or simply behave as a pure language model without visual inputs. It improves upon IDEFICS-1, notably on
+document understanding, OCR, or visual reasoning. Idefics2 is lightweight (8 billion parameters) and treats
+images in their native aspect ratio and resolution, which allows for varying inference efficiency.
+The abstract from the paper is the following:
+The growing interest in vision-language models (VLMs) has been driven by improvements in large language models and vision transformers. Despite the abundance of literature on this subject, we observe that critical decisions regarding the design of VLMs are often not justified. We argue that these unsupported decisions impede progress in the field by making it difficult to identify which choices improve model performance. To address this issue, we conduct extensive experiments around pre-trained models, architecture choice, data, and training methods. Our consolidation of findings includes the development of Idefics2, an efficient foundational VLM of 8 billion parameters. Idefics2 achieves state-of-the-art performance within its size category across various multimodal benchmarks, and is often on par with models four times its size. We release the model (base, instructed, and chat) along with the datasets created for its training.
+
+ Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a498697353ec56e171743b4c0d45ee7f7f19bd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Idefics2 architecture. Taken from the original paper. 
+This model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+Each sample can contain multiple images, and the number of images can vary between samples. The processor will pad the inputs to the maximum number of images in a batch for input to the model.
+The processor has a do_image_splitting option. If True, each input image will be split into 4 sub-images, and concatenated with the original to form 5 images. This is useful for increasing model performance. Make sure processor.image_processor.do_image_splitting is set to False if the model was not trained with this option.
+text passed to the processor should have the <image> tokens where the images should be inserted. And <end_of_utterance> at the end of each utterance if the text is a chat message.
+The processor has its own apply_chat_template method to convert chat messages to text that can then be passed as text to the processor.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12f6a6756849ab33de2ab76baf756e704976638f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+Example of how to use the processor on chat messages:
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+}]
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+at inference time, one needs to pass add_generation_prompt=True in order to make sure the model completes the prompt
+text = processor.apply_chat_template(messages, add_generation_prompt=True)
+print(text)
+'User: What’s the difference between these two images?\nAssistant:'
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+generated_text = model.generate(**inputs, max_new_tokens=500)
+generated_text = processor.batch_decode(generated_text, skip_special_tokens=True)[0]
+print("Generated text:", generated_text)
+
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e317e6f1ac670ef6fcd43c9873ccf5ff526b8944
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+During training, it's important to determine which tokens the model should not learn. For Idefics2, this typically comes down to the image and padding tokens. This means that one can create the labels as follows:
+
+thon
+import requests
+from PIL import Image
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
+import torch
+url_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+url_2 = "http://images.cocodataset.org/val2017/000000219578.jpg"
+image_1 = Image.open(requests.get(url_1, stream=True).raw)
+image_2 = Image.open(requests.get(url_2, stream=True).raw)
+images = [image_1, image_2]
+messages = [{
+    "role": "user",
+    "content": [
+        {"type": "text", "text": "What’s the difference between these two images?"},
+        {"type": "image"},
+        {"type": "image"},
+    ],
+},
+{
+    "role": "assistant",
+    "content": [
+        {"type": "text", "text": "The difference is that one image is about dogs and the other one about cats."},
+    ],
+}]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b")
+model = Idefics2ForConditionalGeneration.from_pretrained("HuggingFaceM4/idefics2-8b")
+model.to(device)
+text = processor.apply_chat_template(messages, add_generation_prompt=False)
+inputs = processor(images=images, text=text, return_tensors="pt").to(device)
+labels = inputs.input_ids.clone()
+labels[labels == processor.tokenizer.pad_token_id] = -100
+labels[labels == model.config.image_token_id] = -100
+inputs["labels"] = labels
+outputs = model(**inputs)
+loss = outputs.loss
+loss.backward()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2267378d3425f9760fe998bb2d240074373c48bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+Do note that when training Idefics2 on multi-turn conversations between a user and an assistant, one typically also sets all the tokens corresponding to the user messages to -100.
+Model optimizations: Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, simply change the code snippet above with the following change:
+diff
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
++    torch_dtype=torch.float16,    
++    attn_implementation="flash_attention_2",
+).to(device)
+Shrinking down Idefics2 using quantization
+As the Idefics2 model has 8 billion parameters, that would require about 16GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. One can change the code snippet above with the changes below. We'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+
++ from transformers import BitsAndBytesConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eac2e084acbd103ee573e3d349741020d0157487
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_idefics2.txt_chunk_5.txt
@@ -0,0 +1,35 @@
++ from transformers import BitsAndBytesConfig
+
+quantization_config = BitsAndBytesConfig(
+load_in_4bit=True,
+bnb_4bit_quant_type="nf4",
+bnb_4bit_use_double_quant=True,
+bnb_4bit_compute_dtype=torch.float16
+)
+model = Idefics2ForConditionalGeneration.from_pretrained(
+    "HuggingFaceM4/idefics2-8b",
+torch_dtype=torch.float16,    
+quantization_config=quantization_config,
+).to(device)
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Idefics2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to fine-tune Idefics2 on a custom dataset using the Trainer can be found here. It supports both full fine-tuning as well as (quantized) LoRa.
+A script regarding how to fine-tune Idefics2 using the TRL library can be found here.
+Demo notebook regarding fine-tuning Idefics2 for JSON extraction use cases can be found here. 🌎
+
+Idefics2Config
+[[autodoc]] Idefics2Config
+Idefics2Model
+[[autodoc]] Idefics2Model
+    - forward
+Idefics2ForConditionalGeneration
+[[autodoc]] Idefics2ForConditionalGeneration
+    - forward
+Idefics2ImageProcessor
+[[autodoc]] Idefics2ImageProcessor
+    - preprocess
+Idefics2Processor
+[[autodoc]] Idefics2Processor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a038af9a82d56af65d8a1cdc2b5120dc54b620f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+ImageGPT
+Overview
+The ImageGPT model was proposed in Generative Pretraining from Pixels by Mark
+Chen, Alec Radford, Rewon Child, Jeffrey Wu, Heewoo Jun, David Luan, Ilya Sutskever. ImageGPT (iGPT) is a GPT-2-like
+model trained to predict the next pixel value, allowing for both unconditional and conditional image generation.
+The abstract from the paper is the following:
+Inspired by progress in unsupervised representation learning for natural language, we examine whether similar models
+can learn useful representations for images. We train a sequence Transformer to auto-regressively predict pixels,
+without incorporating knowledge of the 2D input structure. Despite training on low-resolution ImageNet without labels,
+we find that a GPT-2 scale model learns strong image representations as measured by linear probing, fine-tuning, and
+low-data classification. On CIFAR-10, we achieve 96.3% accuracy with a linear probe, outperforming a supervised Wide
+ResNet, and 99.0% accuracy with full fine-tuning, matching the top supervised pre-trained models. We are also
+competitive with self-supervised benchmarks on ImageNet when substituting pixels for a VQVAE encoding, achieving 69.0%
+top-1 accuracy on a linear probe of our features.
+
+ Summary of the approach. Taken from the original paper. 
+This model was contributed by nielsr, based on this issue. The original code can be found
+here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64cc96d5e341ea99488ef210421c40a1d8273fb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+ImageGPT is almost exactly the same as GPT-2, with the exception that a different activation
+  function is used (namely "quick gelu"), and the layer normalization layers don't mean center the inputs. ImageGPT
+  also doesn't have tied input- and output embeddings.
+As the time- and memory requirements of the attention mechanism of Transformers scales quadratically in the sequence
+  length, the authors pre-trained ImageGPT on smaller input resolutions, such as 32x32 and 64x64. However, feeding a
+  sequence of 32x32x3=3072 tokens from 0..255 into a Transformer is still prohibitively large. Therefore, the authors
+  applied k-means clustering to the (R,G,B) pixel values with k=512. This way, we only have a 32*32 = 1024-long
+  sequence, but now of integers in the range 0..511. So we are shrinking the sequence length at the cost of a bigger
+  embedding matrix. In other words, the vocabulary size of ImageGPT is 512, + 1 for a special "start of sentence" (SOS)
+  token, used at the beginning of every sequence. One can use [ImageGPTImageProcessor] to prepare
+  images for the model.
+Despite being pre-trained entirely unsupervised (i.e. without the use of any labels), ImageGPT produces fairly
+  performant image features useful for downstream tasks, such as image classification. The authors showed that the
+  features in the middle of the network are the most performant, and can be used as-is to train a linear model (such as
+  a sklearn logistic regression model for example). This is also referred to as "linear probing". Features can be
+  easily obtained by first forwarding the image through the model, then specifying output_hidden_states=True, and
+  then average-pool the hidden states at whatever layer you like.
+Alternatively, one can further fine-tune the entire model on a downstream dataset, similar to BERT. For this, you can
+  use [ImageGPTForImageClassification].
+ImageGPT comes in different sizes: there's ImageGPT-small, ImageGPT-medium and ImageGPT-large. The authors did also
+  train an XL variant, which they didn't release. The differences in size are summarized in the following table:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2359a1e367ffad5ed42db72f402d58c1f33c5cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+| Model variant | Depths | Hidden sizes | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+|---|---|---|---|---|---|
+| MiT-b0 | [2, 2, 2, 2] | [32, 64, 160, 256] | 256 | 3.7 | 70.5 |
+| MiT-b1 | [2, 2, 2, 2] | [64, 128, 320, 512] | 256 | 14.0 | 78.7 |
+| MiT-b2 | [3, 4, 6, 3] | [64, 128, 320, 512] | 768 | 25.4 | 81.6 |
+| MiT-b3 | [3, 4, 18, 3] | [64, 128, 320, 512] | 768 | 45.2 | 83.1 |
+| MiT-b4 | [3, 8, 27, 3] | [64, 128, 320, 512] | 768 | 62.6 | 83.6 |
+| MiT-b5 | [3, 6, 40, 3] | [64, 128, 320, 512] | 768 | 82.0 | 83.8 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ImageGPT.
+
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48fb4e672f2ccf08e4bc585961e50ded4b7889a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_imagegpt.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+Demo notebooks for ImageGPT can be found here.
+[ImageGPTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ImageGPTConfig
+[[autodoc]] ImageGPTConfig
+ImageGPTFeatureExtractor
+[[autodoc]] ImageGPTFeatureExtractor
+    - call
+ImageGPTImageProcessor
+[[autodoc]] ImageGPTImageProcessor
+    - preprocess
+ImageGPTModel
+[[autodoc]] ImageGPTModel
+    - forward
+ImageGPTForCausalImageModeling
+[[autodoc]] ImageGPTForCausalImageModeling
+    - forward
+ImageGPTForImageClassification
+[[autodoc]] ImageGPTForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4ff6b394f3a752fd17ca4e2cf3e11b108d92db0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+Informer
+Overview
+The Informer model was proposed in Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting by Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, and Wancai Zhang.
+This method introduces a Probabilistic Attention mechanism to select the "active" queries rather than the "lazy" queries and provides a sparse Transformer thus mitigating the quadratic compute and memory requirements of vanilla attention.
+The abstract from the paper is the following:
+Many real-world applications require the prediction of long sequence time-series, such as electricity consumption planning. Long sequence time-series forecasting (LSTF) demands a high prediction capacity of the model, which is the ability to capture precise long-range dependency coupling between output and input efficiently. Recent studies have shown the potential of Transformer to increase the prediction capacity. However, there are several severe issues with Transformer that prevent it from being directly applicable to LSTF, including quadratic time complexity, high memory usage, and inherent limitation of the encoder-decoder architecture. To address these issues, we design an efficient transformer-based model for LSTF, named Informer, with three distinctive characteristics: (i) a ProbSparse self-attention mechanism, which achieves O(L logL) in time complexity and memory usage, and has comparable performance on sequences' dependency alignment. (ii) the self-attention distilling highlights dominating attention by halving cascading layer input, and efficiently handles extreme long input sequences. (iii) the generative style decoder, while conceptually simple, predicts the long time-series sequences at one forward operation rather than a step-by-step way, which drastically improves the inference speed of long-sequence predictions. Extensive experiments on four large-scale datasets demonstrate that Informer significantly outperforms existing methods and provides a new solution to the LSTF problem.
+This model was contributed by elisim and kashif.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78a80e97e597487584543c46d04ea9bc1dd6c3cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_informer.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+Check out the Informer blog-post in HuggingFace blog: Multivariate Probabilistic Time Series Forecasting with Informer
+
+InformerConfig
+[[autodoc]] InformerConfig
+InformerModel
+[[autodoc]] InformerModel
+    - forward
+InformerForPrediction
+[[autodoc]] InformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bee1b6ddd32c7ed7ada66e79413d08f7d0d5d00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+InstructBLIP
+Overview
+The InstructBLIP model was proposed in InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning by Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, Steven Hoi.
+InstructBLIP leverages the BLIP-2 architecture for visual instruction tuning.
+The abstract from the paper is the following:
+General-purpose language models that can solve various language-domain tasks have emerged driven by the pre-training and instruction-tuning pipeline. However, building general-purpose vision-language models is challenging due to the increased task discrepancy introduced by the additional visual input. Although vision-language pre-training has been widely studied, vision-language instruction tuning remains relatively less explored. In this paper, we conduct a systematic and comprehensive study on vision-language instruction tuning based on the pre-trained BLIP-2 models. We gather a wide variety of 26 publicly available datasets, transform them into instruction tuning format and categorize them into two clusters for held-in instruction tuning and held-out zero-shot evaluation. Additionally, we introduce instruction-aware visual feature extraction, a crucial method that enables the model to extract informative features tailored to the given instruction. The resulting InstructBLIP models achieve state-of-the-art zero-shot performance across all 13 held-out datasets, substantially outperforming BLIP-2 and the larger Flamingo. Our models also lead to state-of-the-art performance when finetuned on individual downstream tasks (e.g., 90.7% accuracy on ScienceQA IMG). Furthermore, we qualitatively demonstrate the advantages of InstructBLIP over concurrent multimodal models.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aa9626867e787836a88cbfd573a50c0310c0587b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_instructblip.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+InstructBLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+InstructBLIP uses the same architecture as BLIP-2 with a tiny but important difference: it also feeds the text prompt (instruction) to the Q-Former.
+InstructBlipConfig
+[[autodoc]] InstructBlipConfig
+    - from_vision_qformer_text_configs
+InstructBlipVisionConfig
+[[autodoc]] InstructBlipVisionConfig
+InstructBlipQFormerConfig
+[[autodoc]] InstructBlipQFormerConfig
+InstructBlipProcessor
+[[autodoc]] InstructBlipProcessor
+InstructBlipVisionModel
+[[autodoc]] InstructBlipVisionModel
+    - forward
+InstructBlipQFormerModel
+[[autodoc]] InstructBlipQFormerModel
+    - forward
+InstructBlipForConditionalGeneration
+[[autodoc]] InstructBlipForConditionalGeneration
+    - forward
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecaf6bfbe883a0f5eda5a556d67e50d1e3668636
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Jamba
+Overview
+Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. It is the first production-scale Mamba implementation, which opens up interesting research and application opportunities. While this initial experimentation shows encouraging gains, we expect these to be further enhanced with future optimizations and explorations.
+For full details of this model please read the release blog post.
+Model Details
+Jamba is a pretrained, mixture-of-experts (MoE) generative text model, with 12B active parameters and an overall of 52B parameters across all experts. It supports a 256K context length, and can fit up to 140K tokens on a single 80GB GPU.
+As depicted in the diagram below, Jamba's architecture features a blocks-and-layers approach that allows Jamba to successfully integrate Transformer and Mamba architectures altogether. Each Jamba block contains either an attention or a Mamba layer, followed by a multi-layer perceptron (MLP), producing an overall ratio of one Transformer layer out of every eight total layers.
+
+Usage
+Presequities
+Jamba requires you use transformers version 4.39.0 or higher:
+
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a812c0a3f396a44fdb17e0f2af8a572705fe5b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+pip install transformers>=4.39.0
+In order to run optimized Mamba implementations, you first need to install mamba-ssm and causal-conv1d:
+
+pip install mamba-ssm causal-conv1d>=1.2.0
+You also have to have the model on a CUDA device.
+You can run the model not using the optimized Mamba kernels, but it is not recommended as it will result in significantly lower latencies. In order to do that, you'll need to specify use_mamba_kernels=False when loading the model.
+Run the model
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("ai21labs/Jamba-v0.1")
+input_ids = tokenizer("In the recent Super Bowl LVIII,", return_tensors='pt').to(model.device)["input_ids"]
+outputs = model.generate(input_ids, max_new_tokens=216)
+print(tokenizer.batch_decode(outputs))
+["<|startoftext|>In the recent Super Bowl LVIII, the Kansas City Chiefs emerged victorious, defeating the San Francisco 49ers in a thrilling overtime showdown. The game was a nail-biter, with both teams showcasing their skills and determination.\n\nThe Chiefs, led by their star quarterback Patrick Mahomes, displayed their offensive prowess, while the 49ers, led by their strong defense, put up a tough fight. The game went into overtime, with the Chiefs ultimately securing the win with a touchdown.\n\nThe victory marked the Chiefs' second Super Bowl win in four years, solidifying their status as one of the top teams in the NFL. The game was a testament to the skill and talent of both teams, and a thrilling end to the NFL season.\n\nThe Super Bowl is not just about the game itself, but also about the halftime show and the commercials. This year's halftime show featured a star-studded lineup, including Usher, Alicia Keys, and Lil Jon. The show was a spectacle of music and dance, with the performers delivering an energetic and entertaining performance.\n"]
+
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51e64801caa2a1123839890cca7d943f44bfeedd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+Loading the model in half precision
+
+The published checkpoint is saved in BF16. In order to load it into RAM in BF16/FP16, you need to specify `torch_dtype`:
+
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16)
+# you can also use torch_dtype=torch.float16
+
+When using half precision, you can enable the [FlashAttention2](https://github.com/Dao-AILab/flash-attention) implementation of the Attention blocks. In order to use it, you also need the model on a CUDA device. Since in this precision the model is to big to fit on a single 80GB GPU, you'll also need to parallelize it using [accelerate](https://huggingface.co/docs/accelerate/index):
+thon
+from transformers import AutoModelForCausalLM
+import torch
+model = AutoModelForCausalLM.from_pretrained("ai21labs/Jamba-v0.1",
+                                             torch_dtype=torch.bfloat16,
+                                             attn_implementation="flash_attention_2",
+                                             device_map="auto")
+
+Load the model in 8-bit
+
+**Using 8-bit precision, it is possible to fit up to 140K sequence lengths on a single 80GB GPU.** You can easily quantize the model to 8-bit using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/index). In order to not degrade model quality, we recommend to exclude the Mamba blocks from the quantization:
+
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["mamba"])
+model = AutoModelForCausalLM.from_pretrained(
+    "ai21labs/Jamba-v0.1", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", quantization_config=quantization_config
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2b811765b072c511b993728f6c36b81a04a193
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jamba.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+JambaConfig
+[[autodoc]] JambaConfig
+JambaModel
+[[autodoc]] JambaModel
+    - forward
+JambaForCausalLM
+[[autodoc]] JambaForCausalLM
+    - forward
+JambaForSequenceClassification
+[[autodoc]] transformers.JambaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0493611e19e1761dd8e39178f6702267d3b93bc4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jetmoe.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+JetMoe
+Overview
+JetMoe-8B is an 8B Mixture-of-Experts (MoE) language model developed by Yikang Shen and MyShell.
+JetMoe project aims to provide a LLaMA2-level performance and efficient language model with a limited budget.
+To achieve this goal, JetMoe uses a sparsely activated architecture inspired by the ModuleFormer. 
+Each JetMoe block consists of two MoE layers: Mixture of Attention Heads and Mixture of MLP Experts.
+Given the input tokens, it activates a subset of its experts to process them.
+This sparse activation schema enables JetMoe to achieve much better training throughput than similar size dense models. 
+The training throughput of JetMoe-8B is around 100B tokens per day on a cluster of 96 H100 GPUs with a straightforward 3-way pipeline parallelism strategy.
+This model was contributed by Yikang Shen.
+JetMoeConfig
+[[autodoc]] JetMoeConfig
+JetMoeModel
+[[autodoc]] JetMoeModel
+    - forward
+JetMoeForCausalLM
+[[autodoc]] JetMoeForCausalLM
+    - forward
+JetMoeForSequenceClassification
+[[autodoc]] JetMoeForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b1d9d2ff05264040f02b5773ef176fa61dcedc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+Jukebox
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Jukebox model was proposed in Jukebox: A generative model for music
+by Prafulla Dhariwal, Heewoo Jun, Christine Payne, Jong Wook Kim, Alec Radford,
+Ilya Sutskever. It introduces a generative music model which can produce minute long samples that can be conditioned on
+an artist, genres and lyrics.
+The abstract from the paper is the following:
+We introduce Jukebox, a model that generates music with singing in the raw audio domain. We tackle the long context of raw audio using a multiscale VQ-VAE to compress it to discrete codes, and modeling those using autoregressive Transformers. We show that the combined model at scale can generate high-fidelity and diverse songs with coherence up to multiple minutes. We can condition on artist and genre to steer the musical and vocal style, and on unaligned lyrics to make the singing more controllable. We are releasing thousands of non cherry-picked samples, along with model weights and code.
+As shown on the following figure, Jukebox is made of 3 priors which are decoder only models. They follow the architecture described in Generating Long Sequences with Sparse Transformers, modified to support longer context length.
+First, a autoencoder is used to encode the text lyrics. Next, the first (also called top_prior) prior attends to the last hidden states extracted from the lyrics encoder. The priors are linked to the previous priors respectively via an AudioConditioner module. TheAudioConditioner upsamples the outputs of the previous prior to raw tokens at a certain audio frame per second resolution.
+The metadata such as artist, genre and timing are passed to each prior, in the form of a start token and positional embedding for the timing data.  The hidden states are mapped to the closest codebook vector from the VQVAE in order to convert them to raw audio.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2d7aef4a3323920c743415a026f06f8b392ba9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_jukebox.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+This model only supports inference. This is for a few reasons, mostly because it requires a crazy amount of memory to train. Feel free to open a PR and add what's missing to have a full integration with the hugging face trainer!
+This model is very slow, and takes 8h to generate a minute long audio using the 5b top prior on a V100 GPU. In order automaticallay handle the device on which the model should execute, use accelerate.
+Contrary to the paper, the order of the priors goes from 0 to 1 as it felt more intuitive : we sample starting from 0.
+Primed sampling (conditioning the sampling on raw audio) requires more memory than ancestral sampling and should be used with fp16 set to True.
+
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+JukeboxConfig
+[[autodoc]] JukeboxConfig
+JukeboxPriorConfig
+[[autodoc]] JukeboxPriorConfig
+JukeboxVQVAEConfig
+[[autodoc]] JukeboxVQVAEConfig
+JukeboxTokenizer
+[[autodoc]] JukeboxTokenizer
+    - save_vocabulary
+JukeboxModel
+[[autodoc]] JukeboxModel
+    - ancestral_sample
+    - primed_sample
+    - continue_sample
+    - upsample
+    - _sample
+JukeboxPrior
+[[autodoc]] JukeboxPrior
+    - sample
+    - forward
+JukeboxVQVAE
+[[autodoc]] JukeboxVQVAE
+    - forward
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d7d8ed1ae34098e51b5a54bb1a1eac9edf7f129c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+KOSMOS-2
+Overview
+The KOSMOS-2 model was proposed in Kosmos-2: Grounding Multimodal Large Language Models to the World by Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, Furu Wei.
+KOSMOS-2 is a Transformer-based causal language model and is trained using the next-word prediction task on a web-scale
+dataset of grounded image-text pairs GRIT. The spatial coordinates of
+the bounding boxes in the dataset are converted to a sequence of location tokens, which are appended to their respective
+entity text spans (for example, a snowman followed by <patch_index_0044><patch_index_0863>). The data format is
+similar to “hyperlinks” that connect the object regions in an image to their text span in the corresponding caption.
+The abstract from the paper is the following:
+We introduce Kosmos-2, a Multimodal Large Language Model (MLLM), enabling new capabilities of perceiving object descriptions (e.g., bounding boxes) and grounding text to the visual world. Specifically, we represent refer expressions as links in Markdown, i.e., ``text span'', where object descriptions are sequences of location tokens. Together with multimodal corpora, we construct large-scale data of grounded image-text pairs (called GrIT) to train the model. In addition to the existing capabilities of MLLMs (e.g., perceiving general modalities, following instructions, and performing in-context learning), Kosmos-2 integrates the grounding capability into downstream applications. We evaluate Kosmos-2 on a wide range of tasks, including (i) multimodal grounding, such as referring expression comprehension, and phrase grounding, (ii) multimodal referring, such as referring expression generation, (iii) perception-language tasks, and (iv) language understanding and generation. This work lays out the foundation for the development of Embodiment AI and sheds light on the big convergence of language, multimodal perception, action, and world modeling, which is a key step toward artificial general intelligence. Code and pretrained models are available at https://aka.ms/kosmos-2.
+
+ Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df7d0a1baccb88baefb187236e9a8a71cd950a39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_kosmos-2.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+Overview of tasks that KOSMOS-2 can handle. Taken from the original paper. 
+Example
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, Kosmos2ForConditionalGeneration
+model = Kosmos2ForConditionalGeneration.from_pretrained("microsoft/kosmos-2-patch14-224")
+processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = " An image of"
+inputs = processor(text=prompt, images=image, return_tensors="pt")
+generated_ids = model.generate(
+     pixel_values=inputs["pixel_values"],
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     image_embeds=None,
+     image_embeds_position_mask=inputs["image_embeds_position_mask"],
+     use_cache=True,
+     max_new_tokens=64,
+ )
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+processed_text = processor.post_process_generation(generated_text, cleanup_and_extract=False)
+processed_text
+' An image of a snowman warming himself by a fire.'
+caption, entities = processor.post_process_generation(generated_text)
+caption
+'An image of a snowman warming himself by a fire.'
+entities
+[('a snowman', (12, 21), [(0.390625, 0.046875, 0.984375, 0.828125)]), ('a fire', (41, 47), [(0.171875, 0.015625, 0.484375, 0.890625)])]
+
+This model was contributed by Yih-Dar SHIEH. The original code can be found here.
+Kosmos2Config
+[[autodoc]] Kosmos2Config
+Kosmos2ImageProcessor
+Kosmos2Processor
+[[autodoc]] Kosmos2Processor
+    - call
+Kosmos2Model
+[[autodoc]] Kosmos2Model
+    - forward
+Kosmos2ForConditionalGeneration
+[[autodoc]] Kosmos2ForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89af38ef51f47ad195a6c010521ba54c98430f0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+LayoutLM
+
+Overview
+The LayoutLM model was proposed in the paper LayoutLM: Pre-training of Text and Layout for Document Image
+Understanding by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, and
+Ming Zhou. It's a simple but effective pretraining method of text and layout for document image understanding and
+information extraction tasks, such as form understanding and receipt understanding. It obtains state-of-the-art results
+on several downstream tasks:
+
+form understanding: the FUNSD dataset (a collection of 199 annotated
+  forms comprising more than 30,000 words).
+receipt understanding: the SROIE dataset (a collection of 626 receipts for
+  training and 347 receipts for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+
+The abstract from the paper is the following:
+Pre-training techniques have been verified successfully in a variety of NLP tasks in recent years. Despite the
+widespread use of pretraining models for NLP applications, they almost exclusively focus on text-level manipulation,
+while neglecting layout and style information that is vital for document image understanding. In this paper, we propose
+the LayoutLM to jointly model interactions between text and layout information across scanned document images, which is
+beneficial for a great number of real-world document image understanding tasks such as information extraction from
+scanned documents. Furthermore, we also leverage image features to incorporate words' visual information into LayoutLM.
+To the best of our knowledge, this is the first time that text and layout are jointly learned in a single framework for
+document-level pretraining. It achieves new state-of-the-art results in several downstream tasks, including form
+understanding (from 70.72 to 79.27), receipt understanding (from 94.02 to 95.24) and document image classification
+(from 93.07 to 94.42).
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f857056177f0ca3753209bdcd23386b1a2859e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+In addition to input_ids, [~transformers.LayoutLMModel.forward] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where
+  (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
+
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLM. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on fine-tuning
+  LayoutLM for document-understanding using Keras & Hugging Face
+  Transformers.
+
+A blog post on how to fine-tune LayoutLM for document-understanding using only Hugging Face Transformers.
+
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d3675d6f5c4f53e0fc5b96b4774b435c979fe7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlm.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+A notebook on how to fine-tune LayoutLM on the FUNSD dataset with image embeddings.
+
+See also: Document question answering task guide
+
+A notebook on how to fine-tune LayoutLM for sequence classification on the RVL-CDIP dataset.
+Text classification task guide
+
+A notebook on how to  fine-tune LayoutLM for token classification on the FUNSD dataset.
+Token classification task guide
+
+Other resources
+- Masked language modeling task guide
+🚀 Deploy
+
+A blog post on how to Deploy LayoutLM with Hugging Face Inference Endpoints.
+
+LayoutLMConfig
+[[autodoc]] LayoutLMConfig
+LayoutLMTokenizer
+[[autodoc]] LayoutLMTokenizer
+LayoutLMTokenizerFast
+[[autodoc]] LayoutLMTokenizerFast
+
+LayoutLMModel
+[[autodoc]] LayoutLMModel
+LayoutLMForMaskedLM
+[[autodoc]] LayoutLMForMaskedLM
+LayoutLMForSequenceClassification
+[[autodoc]] LayoutLMForSequenceClassification
+LayoutLMForTokenClassification
+[[autodoc]] LayoutLMForTokenClassification
+LayoutLMForQuestionAnswering
+[[autodoc]] LayoutLMForQuestionAnswering
+
+TFLayoutLMModel
+[[autodoc]] TFLayoutLMModel
+TFLayoutLMForMaskedLM
+[[autodoc]] TFLayoutLMForMaskedLM
+TFLayoutLMForSequenceClassification
+[[autodoc]] TFLayoutLMForSequenceClassification
+TFLayoutLMForTokenClassification
+[[autodoc]] TFLayoutLMForTokenClassification
+TFLayoutLMForQuestionAnswering
+[[autodoc]] TFLayoutLMForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afe084a7f1c38293afa25f2f31b286bf255273ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+LayoutLMV2
+Overview
+The LayoutLMV2 model was proposed in LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding by Yang Xu, Yiheng Xu, Tengchao Lv, Lei Cui, Furu Wei, Guoxin Wang, Yijuan Lu,
+Dinei Florencio, Cha Zhang, Wanxiang Che, Min Zhang, Lidong Zhou. LayoutLMV2 improves LayoutLM to obtain
+state-of-the-art results across several document image understanding benchmarks:
+
+information extraction from scanned documents: the FUNSD dataset (a
+  collection of 199 annotated forms comprising more than 30,000 words), the CORD
+  dataset (a collection of 800 receipts for training, 100 for validation and 100 for testing), the SROIE dataset (a collection of 626 receipts for training and 347 receipts for testing)
+  and the Kleister-NDA dataset (a collection of non-disclosure
+  agreements from the EDGAR database, including 254 documents for training, 83 documents for validation, and 203
+  documents for testing).
+document image classification: the RVL-CDIP dataset (a collection of
+  400,000 images belonging to one of 16 classes).
+document visual question answering: the DocVQA dataset (a collection of 50,000
+  questions defined on 12,000+ document images).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6df2eb01444dfec6fd46e1dba9107b314cf1d124
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+The abstract from the paper is the following:
+Pre-training of text and layout has proved effective in a variety of visually-rich document understanding tasks due to
+its effective model architecture and the advantage of large-scale unlabeled scanned/digital-born documents. In this
+paper, we present LayoutLMv2 by pre-training text, layout and image in a multi-modal framework, where new model
+architectures and pre-training tasks are leveraged. Specifically, LayoutLMv2 not only uses the existing masked
+visual-language modeling task but also the new text-image alignment and text-image matching tasks in the pre-training
+stage, where cross-modality interaction is better learned. Meanwhile, it also integrates a spatial-aware self-attention
+mechanism into the Transformer architecture, so that the model can fully understand the relative positional
+relationship among different text blocks. Experiment results show that LayoutLMv2 outperforms strong baselines and
+achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks,
+including FUNSD (0.7895 -> 0.8420), CORD (0.9493 -> 0.9601), SROIE (0.9524 -> 0.9781), Kleister-NDA (0.834 -> 0.852),
+RVL-CDIP (0.9443 -> 0.9564), and DocVQA (0.7295 -> 0.8672). The pre-trained LayoutLMv2 model is publicly available at
+this https URL.
+LayoutLMv2 depends on detectron2, torchvision and tesseract. Run the
+following to install them:
+
+python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
+python -m pip install torchvision tesseract
+(If you are developing for LayoutLMv2, note that passing the doctests also requires the installation of these packages.)
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..630cc4b6ca09ab303c06791277476daf68c1e44f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_10.txt
@@ -0,0 +1,27 @@
+LayoutLMv2Config
+[[autodoc]] LayoutLMv2Config
+LayoutLMv2FeatureExtractor
+[[autodoc]] LayoutLMv2FeatureExtractor
+    - call
+LayoutLMv2ImageProcessor
+[[autodoc]] LayoutLMv2ImageProcessor
+    - preprocess
+LayoutLMv2Tokenizer
+[[autodoc]] LayoutLMv2Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv2TokenizerFast
+[[autodoc]] LayoutLMv2TokenizerFast
+    - call
+LayoutLMv2Processor
+[[autodoc]] LayoutLMv2Processor
+    - call
+LayoutLMv2Model
+[[autodoc]] LayoutLMv2Model
+    - forward
+LayoutLMv2ForSequenceClassification
+[[autodoc]] LayoutLMv2ForSequenceClassification
+LayoutLMv2ForTokenClassification
+[[autodoc]] LayoutLMv2ForTokenClassification
+LayoutLMv2ForQuestionAnswering
+[[autodoc]] LayoutLMv2ForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4217494b535f75b721938662c27637533157fe8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+The main difference between LayoutLMv1 and LayoutLMv2 is that the latter incorporates visual embeddings during
+  pre-training (while LayoutLMv1 only adds visual embeddings during fine-tuning).
+LayoutLMv2 adds both a relative 1D attention bias as well as a spatial 2D attention bias to the attention scores in
+  the self-attention layers. Details can be found on page 5 of the paper.
+Demo notebooks on how to use the LayoutLMv2 model on RVL-CDIP, FUNSD, DocVQA, CORD can be found here.
+LayoutLMv2 uses Facebook AI's Detectron2 package for its visual
+  backbone. See this link for installation
+  instructions.
+In addition to input_ids, [~LayoutLMv2Model.forward] expects 2 additional inputs, namely
+  image and bbox. The image input corresponds to the original document image in which the text
+  tokens occur. The model expects each document image to be of size 224x224. This means that if you have a batch of
+  document images, image should be a tensor of shape (batch_size, 3, 224, 224). This can be either a
+  torch.Tensor or a Detectron2.structures.ImageList. You don't need to normalize the channels, as this is
+  done by the model. Important to note is that the visual backbone expects BGR channels instead of RGB, as all models
+  in Detectron2 are pre-trained using the BGR format. The bbox input are the bounding boxes (i.e. 2D-positions)
+  of the input text tokens. This is identical to [LayoutLMModel]. These can be obtained using an
+  external OCR engine such as Google's Tesseract (there's a Python
+  wrapper available). Each bounding box should be in (x0, y0, x1, y1)
+  format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1)
+  represents the position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on
+  a 0-1000 scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe972e96f9fda091ac5c22c727f4f9e40f04924e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as
+follows:
+thon
+from PIL import Image
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+)
+width, height = image.size
+
+However, this model includes a brand new [~transformers.LayoutLMv2Processor] which can be used to directly
+prepare data for the model (including applying OCR under the hood). More information can be found in the "Usage"
+section below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec9e4c217cf34964c55a7aa71ba2d7ae8133a5ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_4.txt
@@ -0,0 +1,25 @@
+Internally, [~transformers.LayoutLMv2Model] will send the image input through its visual backbone to
+  obtain a lower-resolution feature map, whose shape is equal to the image_feature_pool_shape attribute of
+  [~transformers.LayoutLMv2Config]. This feature map is then flattened to obtain a sequence of image tokens. As
+  the size of the feature map is 7x7 by default, one obtains 49 image tokens. These are then concatenated with the text
+  tokens, and send through the Transformer encoder. This means that the last hidden states of the model will have a
+  length of 512 + 49 = 561, if you pad the text tokens up to the max length. More generally, the last hidden states
+  will have a shape of seq_length + image_feature_pool_shape[0] *
+  config.image_feature_pool_shape[1].
+When calling [~transformers.LayoutLMv2Model.from_pretrained], a warning will be printed with a long list of
+  parameter names that are not initialized. This is not a problem, as these parameters are batch normalization
+  statistics, which are going to have values when fine-tuning on a custom dataset.
+If you want to train the model in a distributed environment, make sure to call [synchronize_batch_norm] on the
+  model in order to properly synchronize the batch normalization layers of the visual backbone.
+
+In addition, there's LayoutXLM, which is a multilingual version of LayoutLMv2. More information can be found on
+LayoutXLM's documentation page.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to finetune LayoutLMv2 for text-classification on RVL-CDIP dataset.
+See also: Text classification task guide
+
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1ecd57f3e4f8bbc8ae70ea94945eb841b243d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+A notebook on how to finetune LayoutLMv2 for question-answering on DocVQA dataset.
+See also: Question answering task guide
+See also: Document question answering task guide
+
+A notebook on how to finetune LayoutLMv2 for token-classification on CORD dataset.
+A notebook on how to finetune LayoutLMv2 for token-classification on FUNSD dataset.
+See also: Token classification task guide
+
+Usage: LayoutLMv2Processor
+The easiest way to prepare data for the model is to use [LayoutLMv2Processor], which internally
+combines a image processor ([LayoutLMv2ImageProcessor]) and a tokenizer
+([LayoutLMv2Tokenizer] or [LayoutLMv2TokenizerFast]). The image processor
+handles the image modality, while the tokenizer handles the text modality. A processor combines both, which is ideal
+for a multi-modal model like LayoutLMv2. Note that you can still use both separately, if you only want to handle one
+modality.
+thon
+from transformers import LayoutLMv2ImageProcessor, LayoutLMv2TokenizerFast, LayoutLMv2Processor
+image_processor = LayoutLMv2ImageProcessor()  # apply_ocr is set to True by default
+tokenizer = LayoutLMv2TokenizerFast.from_pretrained("microsoft/layoutlmv2-base-uncased")
+processor = LayoutLMv2Processor(image_processor, tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..129013ab2afb9a879c0b351f9b3088c01f8696d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_6.txt
@@ -0,0 +1,30 @@
+In short, one can provide a document image (and possibly additional data) to [LayoutLMv2Processor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[LayoutLMv2ImageProcessor] to apply OCR on the image to get a list of words and normalized
+bounding boxes, as well to resize the image to a given size in order to get the image input. The words and
+normalized bounding boxes are then provided to [LayoutLMv2Tokenizer] or
+[LayoutLMv2TokenizerFast], which converts them to token-level input_ids,
+attention_mask, token_type_ids, bbox. Optionally, one can provide word labels to the processor,
+which are turned into token-level labels.
+[LayoutLMv2Processor] uses PyTesseract, a Python
+wrapper around Google's Tesseract OCR engine, under the hood. Note that you can still use your own OCR engine of
+choice, and provide the words and normalized boxes yourself. This requires initializing
+[LayoutLMv2ImageProcessor] with apply_ocr set to False.
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: document image classification (training, inference) + token classification (inference), apply_ocr =
+True
+This is the simplest case, in which the processor (actually the image processor) will perform OCR on the image to get
+the words and normalized bounding boxes.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+encoding = processor(
+    image, return_tensors="pt"
+)  # you can also add all tokenizer parameters here such as padding, truncation
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35d3104a653f678f83557f54ec2c6d835b8b2a79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_7.txt
@@ -0,0 +1,16 @@
+Use case 2: document image classification (training, inference) + token classification (inference), apply_ocr=False
+In case one wants to do OCR themselves, one can initialize the image processor with apply_ocr set to
+False. In that case, one should provide the words and corresponding (normalized) bounding boxes themselves to
+the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f0bf77a9553165d9f3d89f166fa10c9ad2fe01a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_8.txt
@@ -0,0 +1,19 @@
+Use case 3: token classification (training), apply_ocr=False
+For token classification tasks (such as FUNSD, CORD, SROIE, Kleister-NDA), one can also provide the corresponding word
+labels in order to train a model. The processor will then convert these into token-level labels. By default, it
+will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+word_labels = [1, 2]
+encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'labels', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b0f11ab97d4745eab8627aa75107c260dd7cf72
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv2.txt_chunk_9.txt
@@ -0,0 +1,31 @@
+Use case 4: visual question answering (inference), apply_ocr=True
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. By default, the
+processor will apply OCR on the image, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+encoding = processor(image, question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
+
+Use case 5: visual question answering (inference), apply_ocr=False
+For visual question answering tasks (such as DocVQA), you can provide a question to the processor. If you want to
+perform OCR yourself, you can provide your own words and (normalized) bounding boxes to the processor.
+thon
+from transformers import LayoutLMv2Processor
+from PIL import Image
+processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased", revision="no_ocr")
+image = Image.open(
+    "name_of_your_document - can be a png, jpg, etc. of your documents (PDFs must be converted to images)."
+).convert("RGB")
+question = "What's his name?"
+words = ["hello", "world"]
+boxes = [[1, 2, 3, 4], [5, 6, 7, 8]]  # make sure to normalize your bounding boxes
+encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'bbox', 'image'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d410219e6b897ee552a016e31572272a2efbf4bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+LayoutLMv3
+Overview
+The LayoutLMv3 model was proposed in LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking by Yupan Huang, Tengchao Lv, Lei Cui, Yutong Lu, Furu Wei.
+LayoutLMv3 simplifies LayoutLMv2 by using patch embeddings (as in ViT) instead of leveraging a CNN backbone, and pre-trains the model on 3 objectives: masked language modeling (MLM), masked image modeling (MIM)
+and word-patch alignment (WPA).
+The abstract from the paper is the following:
+Self-supervised pre-training techniques have achieved remarkable progress in Document AI. Most multimodal pre-trained models use a masked language modeling objective to learn bidirectional representations on the text modality, but they differ in pre-training objectives for the image modality. This discrepancy adds difficulty to multimodal representation learning. In this paper, we propose LayoutLMv3 to pre-train multimodal Transformers for Document AI with unified text and image masking. Additionally, LayoutLMv3 is pre-trained with a word-patch alignment objective to learn cross-modal alignment by predicting whether the corresponding image patch of a text word is masked. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model for both text-centric and image-centric Document AI tasks. Experimental results show that LayoutLMv3 achieves state-of-the-art performance not only in text-centric tasks, including form understanding, receipt understanding, and document visual question answering, but also in image-centric tasks such as document image classification and document layout analysis.
+
+ LayoutLMv3 architecture. Taken from the original paper. 
+This model was contributed by nielsr. The TensorFlow version of this model was added by chriskoo, tokec, and lre. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3da7fbcf3a102bada562520f431e55c4a98470a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+In terms of data processing, LayoutLMv3 is identical to its predecessor LayoutLMv2, except that:
+images need to be resized and normalized with channels in regular RGB format. LayoutLMv2 on the other hand normalizes the images internally and expects the channels in BGR format.
+text is tokenized using byte-pair encoding (BPE), as opposed to WordPiece.
+  Due to these differences in data preprocessing, one can use [LayoutLMv3Processor] which internally combines a [LayoutLMv3ImageProcessor] (for the image modality) and a [LayoutLMv3Tokenizer]/[LayoutLMv3TokenizerFast] (for the text modality) to prepare all data for the model.
+
+Regarding usage of [LayoutLMv3Processor], we refer to the usage guide of its predecessor.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LayoutLMv3. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+LayoutLMv3 is nearly identical to LayoutLMv2, so we've also included LayoutLMv2 resources you can adapt for LayoutLMv3 tasks. For these notebooks, take care to use [LayoutLMv2Processor] instead when preparing data for the model!
+
+Demo notebooks for LayoutLMv3 can be found here.
+Demo scripts can be found here.
+
+[LayoutLMv2ForSequenceClassification] is supported by this notebook.
+Text classification task guide
+
+[LayoutLMv3ForTokenClassification] is supported by this example script and notebook.
+A notebook for how to perform inference with [LayoutLMv2ForTokenClassification] and a notebook for how to perform inference when no labels are available with [LayoutLMv2ForTokenClassification].
+A notebook for how to finetune [LayoutLMv2ForTokenClassification] with the 🤗 Trainer.
+Token classification task guide
+
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f527bdfb21e939ab0ee67e9e59c855e2ad3d871
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutlmv3.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+[LayoutLMv2ForQuestionAnswering] is supported by this notebook.
+Question answering task guide
+
+Document question answering
+- Document question answering task guide
+LayoutLMv3Config
+[[autodoc]] LayoutLMv3Config
+LayoutLMv3FeatureExtractor
+[[autodoc]] LayoutLMv3FeatureExtractor
+    - call
+LayoutLMv3ImageProcessor
+[[autodoc]] LayoutLMv3ImageProcessor
+    - preprocess
+LayoutLMv3Tokenizer
+[[autodoc]] LayoutLMv3Tokenizer
+    - call
+    - save_vocabulary
+LayoutLMv3TokenizerFast
+[[autodoc]] LayoutLMv3TokenizerFast
+    - call
+LayoutLMv3Processor
+[[autodoc]] LayoutLMv3Processor
+    - call
+
+LayoutLMv3Model
+[[autodoc]] LayoutLMv3Model
+    - forward
+LayoutLMv3ForSequenceClassification
+[[autodoc]] LayoutLMv3ForSequenceClassification
+    - forward
+LayoutLMv3ForTokenClassification
+[[autodoc]] LayoutLMv3ForTokenClassification
+    - forward
+LayoutLMv3ForQuestionAnswering
+[[autodoc]] LayoutLMv3ForQuestionAnswering
+    - forward
+
+TFLayoutLMv3Model
+[[autodoc]] TFLayoutLMv3Model
+    - call
+TFLayoutLMv3ForSequenceClassification
+[[autodoc]] TFLayoutLMv3ForSequenceClassification
+    - call
+TFLayoutLMv3ForTokenClassification
+[[autodoc]] TFLayoutLMv3ForTokenClassification
+    - call
+TFLayoutLMv3ForQuestionAnswering
+[[autodoc]] TFLayoutLMv3ForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..daf2f337aed7ed657e77f0f950a139a1ae4564c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+LayoutXLM
+Overview
+LayoutXLM was proposed in LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding by Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha
+Zhang, Furu Wei. It's a multilingual extension of the LayoutLMv2 model trained
+on 53 languages.
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document
+understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In
+this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to
+bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also
+introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in
+7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled
+for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA
+cross-lingual pre-trained models on the XFUN dataset.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips and examples
+One can directly plug in the weights of LayoutXLM into a LayoutLMv2 model, like so:
+thon
+from transformers import LayoutLMv2Model
+model = LayoutLMv2Model.from_pretrained("microsoft/layoutxlm-base")
+
+Note that LayoutXLM has its own tokenizer, based on
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast]. You can initialize it as
+follows:
+thon
+from transformers import LayoutXLMTokenizer
+tokenizer = LayoutXLMTokenizer.from_pretrained("microsoft/layoutxlm-base")
+
+Similar to LayoutLMv2, you can use [LayoutXLMProcessor] (which internally applies
+[LayoutLMv2ImageProcessor] and
+[LayoutXLMTokenizer]/[LayoutXLMTokenizerFast] in sequence) to prepare all
+data for the model.
+
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ee3ce0dc0fbf12d04080f6208b5dba965dad48f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_layoutxlm.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+As LayoutXLM's architecture is equivalent to that of LayoutLMv2, one can refer to LayoutLMv2's documentation page for all tips, code examples and notebooks.
+
+LayoutXLMTokenizer
+[[autodoc]] LayoutXLMTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LayoutXLMTokenizerFast
+[[autodoc]] LayoutXLMTokenizerFast
+    - call
+LayoutXLMProcessor
+[[autodoc]] LayoutXLMProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12fb4c0fa9c2cc2ec8675b6e635d5822a8f922d4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+LED
+Overview
+The LED model was proposed in Longformer: The Long-Document Transformer by Iz
+Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA. We finally introduce the Longformer-Encoder-Decoder (LED), a Longformer variant for supporting
+long document generative sequence-to-sequence tasks, and demonstrate its effectiveness on the arXiv summarization
+dataset.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58f4b8223cba3d9198d5cf2f1f64d334cf727c8e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+[LEDForConditionalGeneration] is an extension of
+  [BartForConditionalGeneration] exchanging the traditional self-attention layer with
+  Longformer's chunked self-attention layer. [LEDTokenizer] is an alias of
+  [BartTokenizer].
+LED works very well on long-range sequence-to-sequence tasks where the input_ids largely exceed a length of
+  1024 tokens.
+LED pads the input_ids to be a multiple of config.attention_window if required. Therefore a small speed-up is
+  gained, when [LEDTokenizer] is used with the pad_to_multiple_of argument.
+LED makes use of global attention by means of the global_attention_mask (see
+  [LongformerModel]). For summarization, it is advised to put global attention only on the first
+  <s> token. For question answering, it is advised to put global attention on all tokens of the question.
+To fine-tune LED on all 16384, gradient checkpointing can be enabled in case training leads to out-of-memory (OOM)
+  errors. This can be done by executing model.gradient_checkpointing_enable(). 
+ Moreover, the use_cache=False
+  flag can be used to disable the caching mechanism to save memory.
+LED is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+
+This model was contributed by patrickvonplaten.
+Resources
+
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b354f7d3196982a9593eb48d2c7456f13e1e5cdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_led.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+A notebook showing how to evaluate LED.
+A notebook showing how to fine-tune LED.
+Text classification task guide
+Question answering task guide
+Translation task guide
+Summarization task guide
+
+LEDConfig
+[[autodoc]] LEDConfig
+LEDTokenizer
+[[autodoc]] LEDTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LEDTokenizerFast
+[[autodoc]] LEDTokenizerFast
+LED specific outputs
+[[autodoc]] models.led.modeling_led.LEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqLMOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqSequenceClassifierOutput
+[[autodoc]] models.led.modeling_led.LEDSeq2SeqQuestionAnsweringModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDEncoderBaseModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqModelOutput
+[[autodoc]] models.led.modeling_tf_led.TFLEDSeq2SeqLMOutput
+
+LEDModel
+[[autodoc]] LEDModel
+    - forward
+LEDForConditionalGeneration
+[[autodoc]] LEDForConditionalGeneration
+    - forward
+LEDForSequenceClassification
+[[autodoc]] LEDForSequenceClassification
+    - forward
+LEDForQuestionAnswering
+[[autodoc]] LEDForQuestionAnswering
+    - forward
+
+TFLEDModel
+[[autodoc]] TFLEDModel
+    - call
+TFLEDForConditionalGeneration
+[[autodoc]] TFLEDForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd9056e008dbfc22ebfd742c74b227d699006710
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+LeViT
+Overview
+The LeViT model was proposed in LeViT: Introducing Convolutions to Vision Transformers by Ben Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Hervé Jégou, Matthijs Douze. LeViT improves the Vision Transformer (ViT) in performance and efficiency by a few architectural differences such as activation maps with decreasing resolutions in Transformers and the introduction of an attention bias to integrate positional information.
+The abstract from the paper is the following:
+*We design a family of image classification architectures that optimize the trade-off between accuracy
+and efficiency in a high-speed regime. Our work exploits recent findings in attention-based architectures,
+which are competitive on highly parallel processing hardware. We revisit principles from the extensive
+literature on convolutional neural networks to apply them to transformers, in particular activation maps
+with decreasing resolutions. We also introduce the attention bias, a new way to integrate positional information
+in vision transformers. As a result, we propose LeVIT: a hybrid neural network for fast inference image classification.
+We consider different measures of efficiency on different hardware platforms, so as to best reflect a wide range of
+application scenarios. Our extensive experiments empirically validate our technical choices and show they are suitable
+to most architectures. Overall, LeViT significantly outperforms existing convnets and vision transformers with respect
+to the speed/accuracy tradeoff. For example, at 80% ImageNet top-1 accuracy, LeViT is 5 times faster than EfficientNet on CPU. *
+
+ LeViT Architecture. Taken from the original paper.
+This model was contributed by anugunj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc0134dd83d1f792c172a38a50d733741f5def6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Compared to ViT, LeViT models use an additional distillation head to effectively learn from a teacher (which, in the LeViT paper, is a ResNet like-model). The distillation head is learned through backpropagation under supervision of a ResNet like-model. They also draw inspiration from convolution neural networks to use activation maps with decreasing resolutions to increase the efficiency.
+There are 2 ways to fine-tune distilled models, either (1) in a classic way, by only placing a prediction head on top
+  of the final hidden state and not using the distillation head, or (2) by placing both a prediction head and distillation
+  head on top of the final hidden state. In that case, the prediction head is trained using regular cross-entropy between
+  the prediction of the head and the ground-truth label, while the distillation prediction head is trained using hard distillation
+  (cross-entropy between the prediction of the distillation head and the label predicted by the teacher). At inference time,
+  one takes the average prediction between both heads as final prediction. (2) is also called "fine-tuning with distillation",
+  because one relies on a teacher that has already been fine-tuned on the downstream dataset. In terms of models, (1) corresponds
+  to [LevitForImageClassification] and (2) corresponds to [LevitForImageClassificationWithTeacher].
+All released checkpoints were pre-trained and fine-tuned on  ImageNet-1k
+  (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). only. No external data was used. This is in
+  contrast with the original ViT model, which used external data like the JFT-300M dataset/Imagenet-21k for
+  pre-training.
+The authors of LeViT released 5 trained LeViT models, which you can directly plug into [LevitModel] or [LevitForImageClassification].
+  Techniques like data augmentation, optimization, and regularization were used in order to simulate training on a much larger dataset
+  (while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..737c3a7b519d661729bbff987b4b77b29c0db12a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+(while only using ImageNet-1k for pre-training). The 5 variants available are (all trained on images of size 224x224):
+  facebook/levit-128S, facebook/levit-128, facebook/levit-192, facebook/levit-256 and
+  facebook/levit-384. Note that one should use [LevitImageProcessor] in order to
+  prepare images for the model.
+[LevitForImageClassificationWithTeacher] currently supports only inference and not training or fine-tuning.
+You can check out demo notebooks regarding inference as well as fine-tuning on custom data here
+  (you can just replace [ViTFeatureExtractor] by [LevitImageProcessor] and [ViTForImageClassification] by [LevitForImageClassification] or [LevitForImageClassificationWithTeacher]).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ea407dbf09de421f02d6f2e4886de94f6ef7b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_levit.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LeViT.
+
+[LevitForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LevitConfig
+[[autodoc]] LevitConfig
+LevitFeatureExtractor
+[[autodoc]] LevitFeatureExtractor
+    - call
+LevitImageProcessor
+[[autodoc]] LevitImageProcessor
+    - preprocess
+LevitModel
+[[autodoc]] LevitModel
+    - forward
+LevitForImageClassification
+[[autodoc]] LevitForImageClassification
+    - forward
+LevitForImageClassificationWithTeacher
+[[autodoc]] LevitForImageClassificationWithTeacher
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41b8c8659509fd059e57c8d5746b7d869d7851a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+LiLT
+Overview
+The LiLT model was proposed in LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Jiapeng Wang, Lianwen Jin, Kai Ding.
+LiLT allows to combine any pre-trained RoBERTa text encoder with a lightweight Layout Transformer, to enable LayoutLM-like document understanding for many
+languages.
+The abstract from the paper is the following:
+Structured document understanding has attracted considerable attention and made significant progress recently, owing to its crucial role in intelligent document processing. However, most existing related models can only deal with the document data of specific language(s) (typically English) included in the pre-training collection, which is extremely limited. To address this issue, we propose a simple yet effective Language-independent Layout Transformer (LiLT) for structured document understanding. LiLT can be pre-trained on the structured documents of a single language and then directly fine-tuned on other languages with the corresponding off-the-shelf monolingual/multilingual pre-trained textual models. Experimental results on eight languages have shown that LiLT can achieve competitive or even superior performance on diverse widely-used downstream benchmarks, which enables language-independent benefit from the pre-training of document layout structure.
+
+ LiLT architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+To combine the Language-Independent Layout Transformer with a new RoBERTa checkpoint from the hub, refer to this guide.
+The script will result in config.json and pytorch_model.bin files being stored locally. After doing this, one can do the following (assuming you're logged in with your HuggingFace account):
+
+thon
+from transformers import LiltModel
+model = LiltModel.from_pretrained("path_to_your_files")
+model.push_to_hub("name_of_repo_on_the_hub")
+
+When preparing data for the model, make sure to use the token vocabulary that corresponds to the RoBERTa checkpoint you combined with the Layout Transformer.
+As lilt-roberta-en-base uses the same vocabulary as LayoutLMv3, one can use [LayoutLMv3TokenizerFast] to prepare data for the model.
+The same is true for lilt-roberta-en-base: one can use [LayoutXLMTokenizerFast] for that model.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7eaf9bb55f4c5f3e6d485bfa6cbca73b24e3027
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lilt.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LiLT.
+
+Demo notebooks for LiLT can be found here.
+
+Documentation resources
+- Text classification task guide
+- Token classification task guide
+- Question answering task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+LiltConfig
+[[autodoc]] LiltConfig
+LiltModel
+[[autodoc]] LiltModel
+    - forward
+LiltForSequenceClassification
+[[autodoc]] LiltForSequenceClassification
+    - forward
+LiltForTokenClassification
+[[autodoc]] LiltForTokenClassification
+    - forward
+LiltForQuestionAnswering
+[[autodoc]] LiltForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3de9344c20af3375cfb92e0575cc16fd34da6ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LLaMA
+Overview
+The LLaMA model was proposed in LLaMA: Open and Efficient Foundation Language Models by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample. It is a collection of foundation language models ranging from 7B to 65B parameters.
+The abstract from the paper is the following:
+*We introduce LLaMA, a collection of foundation language models ranging from 7B to 65B parameters. We train our models on trillions of tokens, and show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. In particular, LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with the best models, Chinchilla-70B and PaLM-540B. We release all our models to the research community. *
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
+
+Weights for the LLaMA models can be obtained from by filling out this form
+After downloading the weights, they will need to be converted to the Hugging Face Transformers format using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f36fb61ad7dee82bc002bf28d2d1aaed6da0b03
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 65B model, it's thus 130GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+This model was contributed by zphang with contributions from BlackSamorez. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here. The Flax version of the implementation was contributed by afmck with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+Based on the original LLaMA model, Meta AI has released some follow-up works:
+
+Llama2: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found here.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to use prompt tuning to adapt the LLaMA model for text classification task. 🌎
+
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..873029afedc08dfccb40b325d499be7fdaf32a19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+StackLLaMA: A hands-on guide to train LLaMA with RLHF, a blog post about how to train LLaMA to answer questions on Stack Exchange with RLHF.
+
+⚗️ Optimization
+- A notebook on how to fine-tune LLaMA model using xturing library on GPU which has limited memory. 🌎 
+⚡️ Inference
+- A notebook on how to run the LLaMA Model using PeftModel from the 🤗 PEFT library. 🌎 
+- A notebook on how to load a PEFT adapter LLaMA model with LangChain. 🌎
+🚀 Deploy
+- A notebook on how to fine-tune LLaMA model using LoRA method via the 🤗 PEFT library with intuitive UI. 🌎 
+- A notebook on how to deploy Open-LLaMA model for text generation on Amazon SageMaker. 🌎 
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
+LlamaForQuestionAnswering
+[[autodoc]] LlamaForQuestionAnswering
+    - forward
+LlamaForTokenClassification
+[[autodoc]] LlamaForTokenClassification
+    - forward
+FlaxLlamaModel
+[[autodoc]] FlaxLlamaModel
+    - call
+FlaxLlamaForCausalLM
+[[autodoc]] FlaxLlamaForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ec23d142b49db960735ec93d54783fdfeb59cd06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Llama2
+Overview
+The Llama2 model was proposed in LLaMA: Open Foundation and Fine-Tuned Chat Models by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom. It is a collection of foundation language models ranging from 7B to 70B parameters, with checkpoints finetuned for chat application!
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48933c0819dc3bccfcab1cac4b85f968781a7ac3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+The abstract from the paper is the following:
+In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama 2-Chat, are optimized for dialogue use cases. Our models outperform open-source chat models on most benchmarks we tested, and based on our human evaluations for helpfulness and safety, may be a suitable substitute for closed-source models. We provide a detailed description of our approach to fine-tuning and safety improvements of Llama 2-Chat in order to enable the community to build on our work and contribute to the responsible development of LLMs.
+Checkout all Llama2 model checkpoints here.
+This model was contributed by Arthur Zucker with contributions from Lysandre Debut. The code of the implementation in Hugging Face is based on GPT-NeoX here. The original code of the authors can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd7a4562af8be19517d815600d2cfd40c6008695
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+The Llama2 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
+
+Weights for the Llama2 models can be obtained by filling out this form
+The architecture is very similar to the first Llama, with the addition of Grouped Query Attention (GQA) following this paper
+Setting config.pretraining_tp to a value different than 1 will activate the more accurate but slower computation of the linear layers, which should better match the original logits.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+After filling out the form and gaining access to the model checkpoints, you should be able to use the already converted checkpoints. Otherwise, if you are converting your own model, feel free to use the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
+
+After conversion, the model and tokenizer can be loaded via:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f7e98053377d95a97619f9cbaad2ab3b993f6f8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import LlamaForCausalLM, LlamaTokenizer
+tokenizer = LlamaTokenizer.from_pretrained("/output/path")
+model = LlamaForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
+
+The LLaMA tokenizer is a BPE model based on sentencepiece. One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
+
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with LLaMA2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Llama 2 is here - get it on Hugging Face, a blog post about Llama 2 and how to use it with 🤗 Transformers and 🤗 PEFT.
+LLaMA 2 - Every Resource you need, a compilation of relevant resources to learn about LLaMA 2 and how to get started quickly.
+
+A notebook on how to fine-tune Llama 2 in Google Colab using QLoRA and 4-bit precision. 🌎
+A notebook on how to fine-tune the "Llama-v2-7b-guanaco" model with 4-bit QLoRA and generate Q&A datasets from PDFs. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3645a11874a2228b12d5da1428551aacfde79163
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama2.txt_chunk_4.txt
@@ -0,0 +1,36 @@
+A notebook on how to fine-tune the Llama 2 model with QLoRa, TRL, and Korean text classification dataset. 🌎🇰🇷
+
+⚗️ Optimization
+- Fine-tune Llama 2 with DPO, a guide to using the TRL library's DPO method to fine tune Llama 2 on a specific dataset.
+- Extended Guide: Instruction-tune Llama 2, a guide to training Llama 2 to generate instructions from inputs, transforming the model from instruction-following to instruction-giving.
+- A notebook on how to fine-tune the Llama 2 model on a personal computer using QLoRa and TRL. 🌎
+⚡️ Inference
+- A notebook on how to quantize the Llama 2 model using GPTQ from the AutoGPTQ library. 🌎
+- A notebook on how to run the Llama 2 Chat Model with 4-bit quantization on a local computer or Google Colab. 🌎
+🚀 Deploy
+- Fine-tune LLaMA 2 (7-70B) on Amazon SageMaker, a complete guide from setup to QLoRA fine-tuning and deployment on Amazon SageMaker.
+- Deploy Llama 2 7B/13B/70B on Amazon SageMaker, a guide on using Hugging Face's LLM DLC container for secure and scalable deployment.
+LlamaConfig
+[[autodoc]] LlamaConfig
+LlamaTokenizer
+[[autodoc]] LlamaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+LlamaTokenizerFast
+[[autodoc]] LlamaTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - update_post_processor
+    - save_vocabulary
+LlamaModel
+[[autodoc]] LlamaModel
+    - forward
+LlamaForCausalLM
+[[autodoc]] LlamaForCausalLM
+    - forward
+LlamaForSequenceClassification
+[[autodoc]] LlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5063bb24d49a09b58d216f2b7b76c4205045ae06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+Llama3
+Overview
+The Llama3 model was proposed in Introducing Meta Llama 3: The most capable openly available LLM to date by the meta AI team.
+The abstract from the blogpost is the following:
+Today, we’re excited to share the first two models of the next generation of Llama, Meta Llama 3, available for broad use. This release features pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases. This next generation of Llama demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning. We believe these are the best open source models of their class, period. In support of our longstanding open approach, we’re putting Llama 3 in the hands of the community. We want to kickstart the next wave of innovation in AI across the stack—from applications to developer tools to evals to inference optimizations and more. We can’t wait to see what you build and look forward to your feedback.
+Checkout all Llama3 model checkpoints here.
+The original code of the authors can be found here.
+Usage tips
+
+The Llama3 models were trained using bfloat16, but the original inference uses float16. The checkpoints uploaded on the Hub use torch_dtype = 'float16', which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online), then it will be casted to the default dtype of torch (becomes torch.float32), and finally, if there is a torch_dtype provided in the config, it will be used. 
+Training the model in float16 is not recommended and is known to produce nan; as such, the model should be trained in bfloat16.
+
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9d74c7f815842fd4124b70ae4749b07533401a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Tips:
+
+Weights for the Llama3 models can be obtained by filling out this form
+The architecture is exactly the same as Llama2.
+The tokenizer is a BPE model based on tiktoken (vs the one based on sentencepiece implementation for Llama2). The main difference that it ignores BPE merge rules when an input token is part of the vocab. This means that if no merge exist to produce "hugging", instead of having the smallest units, like ["hug","ging"] form 2 tokens, if"hugging"` is part of the vocab, it will be automatically returned as a token.
+The original model uses pad_id = -1 which means that there is no padding token. We can't have the same logic, make sure to add a padding token using tokenizer.add_special_tokens({"pad_token":"<pad>"}) and resize the token embedding accordingly. You should also set the model.config.pad_token_id. The embed_tokens layer of the model is initialized with self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx), which makes sure that encoding the padding token will output zeros, so passing it when initializing is recommended.
+The original checkpoint can be converted using the conversion script. The script can be called with the following (example) command:
+
+python src/transformers/models/llama/convert_llama_weights_to_hf.py \
+    --input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path --llama_version 3
+
+After conversion, the model and tokenizer can be loaded via:
+
+thon
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("/output/path")
+model = AutoModelForCausalLM.from_pretrained("/output/path")
+
+Note that executing the script requires enough CPU RAM to host the whole model in float16 precision (even if the biggest versions
+come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM). For the 75B model, it's thus 145GB of RAM needed.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d3fb58d9e2562bde7378031fd103c37bbce9356
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llama3.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+When using Flash Attention 2 via attn_implementation="flash_attention_2", don't pass torch_dtype to the from_pretrained class method and use Automatic Mixed-Precision training. When using Trainer, it is simply specifying either fp16 or bf16 to True. Otherwise, make sure you are using torch.autocast. This is required because the Flash Attention only support fp16 and bf16 data type.
+
+Quick usage
+3
+import transformers
+import torch
+model_id = "meta-llama/Meta-Llama-3-8B"
+pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
+pipeline("Hey how are you doing today?")
+
+Resources
+A ton of cool resources are already available on the documentation page of [~llama2], inviting contributors to add new resources curated for Llama3 here! 🤗
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6cf81f4e172d9616d2a2231f7c68f88ff0f95f2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+LLaVa
+Overview
+LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. It is an auto-regressive language model, based on the transformer architecture. In other words, it is an multi-modal version of LLMs fine-tuned for chat / instructions.
+The LLaVa model was proposed in Visual Instruction Tuning and improved in Improved Baselines with Visual Instruction Tuning by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
+The abstract from the paper is the following:
+Large multimodal models (LMM) have recently shown encouraging progress with visual instruction tuning. In this note, we show that the fully-connected vision-language cross-modal connector in LLaVA is surprisingly powerful and data-efficient. With simple modifications to LLaVA, namely, using CLIP-ViT-L-336px with an MLP projection and adding academic-task-oriented VQA data with simple response formatting prompts, we establish stronger baselines that achieve state-of-the-art across 11 benchmarks. Our final 13B checkpoint uses merely 1.2M publicly available data, and finishes full training in ∼1 day on a single 8-A100 node. We hope this can make state-of-the-art LMM research more accessible. Code and model will be publicly available
+
+ LLaVa architecture. Taken from the original paper. 
+This model was contributed by ArthurZ and ybelkada.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86c3a59ef2c5edb2bc33d208835548b5f7341648
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+"USER: <image>\n<prompt> ASSISTANT:"
+For multiple turns conversation:
+
+"USER: <image>\n<prompt1> ASSISTANT: <answer1></s>USER: <prompt2> ASSISTANT: <answer2></s>USER: <prompt3> ASSISTANT:"
+Using Flash Attention 2
+Flash Attention 2 is an even faster, optimized version of the previous optimization, please refer to the Flash Attention 2 section of performance docs.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with BEiT.
+
+A Google Colab demo on how to run Llava on a free-tier Google colab instance leveraging 4-bit inference.
+A similar notebook showcasing batched inference. 🌎
+
+LlavaConfig
+[[autodoc]] LlavaConfig
+LlavaProcessor
+[[autodoc]] LlavaProcessor
+LlavaForConditionalGeneration
+[[autodoc]] LlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54e5ffe648c16fccc4c62dc64ad7c1e56186a23b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+LLaVA-NeXT
+Overview
+The LLaVA-NeXT model was proposed in LLaVA-NeXT: Improved reasoning, OCR, and world knowledge by Haotian Liu, Chunyuan Li, Yuheng Li, Bo Li, Yuanhan Zhang, Sheng Shen, Yong Jae Lee. LLaVa-NeXT (also called LLaVa-1.6) improves upon LLaVa by increasing the input image resolution and training on an improved visual instruction tuning dataset to improve OCR and common sense reasoning.
+The introduction from the blog is the following:
+*In October 2023, we released LLaVA-1.5 with a simple and efficient design along with great performance on a benchmark suite of 12 datasets. It has since served as the foundation of many comprehensive studies of data, model, and capabilities of large multimodal models (LMM), and has enabled various new applications.
+Today, we are thrilled to present LLaVA-NeXT, with improved reasoning, OCR, and world knowledge. LLaVA-NeXT even exceeds Gemini Pro on several benchmarks.
+Compared with LLaVA-1.5, LLaVA-NeXT has several improvements:
+Increasing the input image resolution to 4x more pixels. This allows it to grasp more visual details. It supports three aspect ratios, up to 672x672, 336x1344, 1344x336 resolution.
+Better visual reasoning and OCR capability with an improved visual instruction tuning data mixture.
+Better visual conversation for more scenarios, covering different applications. Better world knowledge and logical reasoning.
+Efficient deployment and inference with SGLang.
+Along with performance improvements, LLaVA-NeXT maintains the minimalist design and data efficiency of LLaVA-1.5. It re-uses the pretrained connector of LLaVA-1.5, and still uses less than 1M visual instruction tuning samples. The largest 34B variant finishes training in ~1 day with 32 A100s.*
+
+ LLaVa-NeXT incorporates a higher input resolution by encoding various patches of the input image. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage tips
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be937831d168cb46b1ef94b71f8e7f2a012feca5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note that each checkpoint has been trained with a specific prompt format, depending on which large language model (LLM) was used. Below, we list the correct prompt formats to use for the text prompt "What is shown in this image?":
+
+llava-v1.6-mistral-7b-hf requires the following format:
+
+"[INST] <image>\nWhat is shown in this image? [/INST]"
+llava-v1.6-vicuna-7b-hf and llava-v1.6-vicuna-13b-hf require the following format:
+
+"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>\nWhat is shown in this image? ASSISTANT:"
+llava-v1.6-34b-hf requires the following format:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..334518e7e6d45b3e7ffc12769f4b80544f9d04fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+"<|im_start|>system\nAnswer the questions.<|im_end|><|im_start|>user\n<image>\nWhat is shown in this image?<|im_end|><|im_start|>assistant\n"
+Usage example
+Single image inference
+Here's how to load the model and perform inference in half-precision (torch.float16):
+thon
+from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
+from PIL import Image
+import requests
+processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True) 
+model.to("cuda:0")
+prepare image and text prompt, using the appropriate prompt template
+url = "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "[INST] \nWhat is shown in this image? [/INST]"
+inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
+autoregressively complete prompt
+output = model.generate(**inputs, max_new_tokens=100)
+print(processor.decode(output[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afa3749db6f6a4513c90da6a1082bef81dd4acca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+Multi image inference
+LLaVa-Next can perform inference with multiple images as input, where images either belong to the same prompt or different prompts (in batched inference). Here is how you can do it:
+thon
+import requests
+from PIL import Image
+import torch
+from transformers import AutoProcessor, LlavaNextForConditionalGeneration
+Load the model in half-precision
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, device_map="auto")
+processor = AutoProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
+Get three different images
+url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+image_stop = Image.open(requests.get(url, stream=True).raw)
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_cats = Image.open(requests.get(url, stream=True).raw)
+url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.jpg"
+image_snowman = Image.open(requests.get(url, stream=True).raw)
+Prepare a batched prompt, where the first one is a multi-turn conversation and the second is not
+prompt = [
+    "[INST] \nWhat is shown in this image? [/INST] There is a red stop sign in the image. [INST] \nWhat about this image? How many cats do you see [/INST]",
+    "[INST] \nWhat is shown in this image? [/INST]"
+]
+We can simply feed images in the order they have to be used in the text prompt
+Each "" token uses one image leaving the next for the subsequent "" tokens
+inputs = processor(text=prompt, images=[image_stop, image_cats, image_snowman], padding=True, return_tensors="pt").to(model.device)
+Generate
+generate_ids = model.generate(**inputs, max_new_tokens=30)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..037929dc05733597d1b23d84410c1fb6f0e39764
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_llava_next.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+Model optimization
+Quantization using Bitsandbytes
+The model can be loaded in 8 or 4 bits, greatly reducing the memory requirements while maintaining the performance of the original model. First make sure to install bitsandbytes, pip install bitsandbytes and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", quantization_config=quantization_config, device_map="auto")
+
+Use Flash-Attention 2 to further speed-up generation
+First make sure to install flash-attn. Refer to the original repository of Flash Attention regarding that package installation. Simply change the snippet above with:
+thon
+from transformers import LlavaNextForConditionalGeneration
+model = LlavaNextForConditionalGeneration.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    low_cpu_mem_usage=True,
+    use_flash_attention_2=True
+).to(0)
+
+LlavaNextConfig
+[[autodoc]] LlavaNextConfig
+LlavaNextImageProcessor
+[[autodoc]] LlavaNextImageProcessor
+    - preprocess
+LlavaNextProcessor
+[[autodoc]] LlavaNextProcessor
+LlavaNextForConditionalGeneration
+[[autodoc]] LlavaNextForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da48711da6f265dd6de66a18f208e53a9a6b90ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Longformer
+
+Overview
+The Longformer model was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan.
+The abstract from the paper is the following:
+Transformer-based models are unable to process long sequences due to their self-attention operation, which scales
+quadratically with the sequence length. To address this limitation, we introduce the Longformer with an attention
+mechanism that scales linearly with sequence length, making it easy to process documents of thousands of tokens or
+longer. Longformer's attention mechanism is a drop-in replacement for the standard self-attention and combines a local
+windowed attention with a task motivated global attention. Following prior work on long-sequence transformers, we
+evaluate Longformer on character-level language modeling and achieve state-of-the-art results on text8 and enwik8. In
+contrast to most prior work, we also pretrain Longformer and finetune it on a variety of downstream tasks. Our
+pretrained Longformer consistently outperforms RoBERTa on long document tasks and sets new state-of-the-art results on
+WikiHop and TriviaQA.
+This model was contributed by beltagy. The Authors' code can be found here.
+Usage tips
+
+Since the Longformer is based on RoBERTa, it doesn't have token_type_ids. You don't need to indicate which
+  token belongs to which segment. Just separate your segments with the separation token tokenizer.sep_token (or
+  </s>).
+A transformer model replacing the attention matrices by sparse matrices to go faster. Often, the local context (e.g., what are the two tokens left and right?) is enough to take action for a given token. Some preselected input tokens are still given global attention, but the attention matrix has way less parameters, resulting in a speed-up. See the local attention section for more information.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b7a469652a08f747417cd20b329ff2c381c8636
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Longformer Self Attention
+Longformer self attention employs self attention on both a "local" context and a "global" context. Most tokens only
+attend "locally" to each other meaning that each token attends to its \(\frac{1}{2} w\) previous tokens and
+\(\frac{1}{2} w\) succeeding tokens with \(w\) being the window length as defined in
+config.attention_window. Note that config.attention_window can be of type List to define a
+different \(w\) for each layer. A selected few tokens attend "globally" to all other tokens, as it is
+conventionally done for all tokens in BertSelfAttention.
+Note that "locally" and "globally" attending tokens are projected by different query, key and value matrices. Also note
+that every "locally" attending token not only attends to tokens within its window \(w\), but also to all "globally"
+attending tokens so that global attention is symmetric.
+The user can define which tokens attend "locally" and which tokens attend "globally" by setting the tensor
+global_attention_mask at run-time appropriately. All Longformer models employ the following logic for
+global_attention_mask:
+
+0: the token attends "locally",
+1: the token attends "globally".
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7eb95f2dc29616997abc2240c657a4764c97fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+0: the token attends "locally",
+1: the token attends "globally".
+
+For more information please also refer to [~LongformerModel.forward] method.
+Using Longformer self attention, the memory and time complexity of the query-key matmul operation, which usually
+represents the memory and time bottleneck, can be reduced from \(\mathcal{O}(n_s \times n_s)\) to
+\(\mathcal{O}(n_s \times w)\), with \(n_s\) being the sequence length and \(w\) being the average window
+size. It is assumed that the number of "globally" attending tokens is insignificant as compared to the number of
+"locally" attending tokens.
+For more information, please refer to the official paper.
+Training
+[LongformerForMaskedLM] is trained the exact same way [RobertaForMaskedLM] is
+trained and should be used as follows:
+thon
+input_ids = tokenizer.encode("This is a sentence from [MASK] training data", return_tensors="pt")
+mlm_labels = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids, masked_lm_labels=mlm_labels)[0]
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72462068fef88dbfe1d01c6364f25e6f65f42252
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_3.txt
@@ -0,0 +1,48 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LongformerConfig
+[[autodoc]] LongformerConfig
+LongformerTokenizer
+[[autodoc]] LongformerTokenizer
+LongformerTokenizerFast
+[[autodoc]] LongformerTokenizerFast
+Longformer specific outputs
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_longformer.LongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_longformer.LongformerTokenClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerBaseModelOutputWithPooling
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMaskedLMOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerQuestionAnsweringModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerSequenceClassifierOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerMultipleChoiceModelOutput
+[[autodoc]] models.longformer.modeling_tf_longformer.TFLongformerTokenClassifierOutput
+
+LongformerModel
+[[autodoc]] LongformerModel
+    - forward
+LongformerForMaskedLM
+[[autodoc]] LongformerForMaskedLM
+    - forward
+LongformerForSequenceClassification
+[[autodoc]] LongformerForSequenceClassification
+    - forward
+LongformerForMultipleChoice
+[[autodoc]] LongformerForMultipleChoice
+    - forward
+LongformerForTokenClassification
+[[autodoc]] LongformerForTokenClassification
+    - forward
+LongformerForQuestionAnswering
+[[autodoc]] LongformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6d85f1e3c5b7865116fc6c39809664e062abc74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longformer.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+TFLongformerModel
+[[autodoc]] TFLongformerModel
+    - call
+TFLongformerForMaskedLM
+[[autodoc]] TFLongformerForMaskedLM
+    - call
+TFLongformerForQuestionAnswering
+[[autodoc]] TFLongformerForQuestionAnswering
+    - call
+TFLongformerForSequenceClassification
+[[autodoc]] TFLongformerForSequenceClassification
+    - call
+TFLongformerForTokenClassification
+[[autodoc]] TFLongformerForTokenClassification
+    - call
+TFLongformerForMultipleChoice
+[[autodoc]] TFLongformerForMultipleChoice
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d08599a49316e3bb361bfaf6d5ec09eb8faa424e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+LongT5
+Overview
+The LongT5 model was proposed in LongT5: Efficient Text-To-Text Transformer for Long Sequences
+by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung and Yinfei Yang. It's an
+encoder-decoder transformer pre-trained in a text-to-text denoising generative setting. LongT5 model is an extension of
+T5 model, and it enables using one of the two different efficient attention mechanisms - (1) Local attention, or (2)
+Transient-Global attention.
+The abstract from the paper is the following:
+Recent work has shown that either (1) increasing the input length or (2) increasing model size can improve the
+performance of Transformer-based neural models. In this paper, we present a new model, called LongT5, with which we
+explore the effects of scaling both the input length and model size at the same time. Specifically, we integrated
+attention ideas from long-input transformers (ETC), and adopted pre-training strategies from summarization pre-training
+(PEGASUS) into the scalable T5 architecture. The result is a new attention mechanism we call {\em Transient Global}
+(TGlobal), which mimics ETC's local/global attention mechanism, but without requiring additional side-inputs. We are
+able to achieve state-of-the-art results on several summarization tasks and outperform the original T5 models on
+question answering tasks.
+This model was contributed by stancld.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00c5abc05e6ded2b26db25b86a1c55000805a8f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+[LongT5ForConditionalGeneration] is an extension of [T5ForConditionalGeneration] exchanging the traditional
+encoder self-attention layer with efficient either local attention or transient-global (tglobal) attention.
+Unlike the T5 model, LongT5 does not use a task prefix. Furthermore, it uses a different pre-training objective
+inspired by the pre-training of [PegasusForConditionalGeneration].
+LongT5 model is designed to work efficiently and very well on long-range sequence-to-sequence tasks where the
+input sequence exceeds commonly used 512 tokens. It is capable of handling input sequences of a length up to 16,384 tokens.
+For Local Attention, the sparse sliding-window local attention operation allows a given token to attend only r
+tokens to the left and right of it (with r=127 by default). Local Attention does not introduce any new parameters
+to the model. The complexity of the mechanism is linear in input sequence length l: O(l*r).
+Transient Global Attention is an extension of the Local Attention. It, furthermore, allows each input token to
+interact with all other tokens in the layer. This is achieved via splitting an input sequence into blocks of a fixed
+length k (with a default k=16). Then, a global token for such a block is obtained via summing and normalizing the embeddings of every token
+in the block. Thanks to this, the attention allows each token to attend to both nearby tokens like in Local attention, and
+also every global token like in the case of standard global attention (transient represents the fact the global tokens
+are constructed dynamically within each attention operation).  As a consequence, TGlobal attention introduces
+a few new parameters -- global relative position biases and a layer normalization for global token's embedding.
+The complexity of this mechanism is O(l(r + l/k)).
+An example showing how to evaluate a fine-tuned LongT5 model on the pubmed dataset is below.
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3522ffa3683af1e597284d80beb9139c34e2649
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_longt5.txt_chunk_2.txt
@@ -0,0 +1,53 @@
+thon
+
+import evaluate
+from datasets import load_dataset
+from transformers import AutoTokenizer, LongT5ForConditionalGeneration
+dataset = load_dataset("scientific_papers", "pubmed", split="validation")
+model = (
+     LongT5ForConditionalGeneration.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+     .to("cuda")
+     .half()
+ )
+tokenizer = AutoTokenizer.from_pretrained("Stancld/longt5-tglobal-large-16384-pubmed-3k_steps")
+def generate_answers(batch):
+     inputs_dict = tokenizer(
+         batch["article"], max_length=16384, padding="max_length", truncation=True, return_tensors="pt"
+     )
+     input_ids = inputs_dict.input_ids.to("cuda")
+     attention_mask = inputs_dict.attention_mask.to("cuda")
+     output_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=512, num_beams=2)
+     batch["predicted_abstract"] = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+     return batch
+result = dataset.map(generate_answer, batched=True, batch_size=2)
+rouge = evaluate.load("rouge")
+rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"])
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+LongT5Config
+[[autodoc]] LongT5Config
+
+LongT5Model
+[[autodoc]] LongT5Model
+    - forward
+LongT5ForConditionalGeneration
+[[autodoc]] LongT5ForConditionalGeneration
+    - forward
+LongT5EncoderModel
+[[autodoc]] LongT5EncoderModel
+    - forward
+
+FlaxLongT5Model
+[[autodoc]] FlaxLongT5Model
+    - call
+    - encode
+    - decode
+FlaxLongT5ForConditionalGeneration
+[[autodoc]] FlaxLongT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b54bc7078b2799d87267a9d4cc8266bb53bdddb9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+LUKE
+Overview
+The LUKE model was proposed in LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on various downstream tasks involving reasoning about entities such as named entity recognition,
+extractive and cloze-style question answering, entity typing, and relation classification.
+The abstract from the paper is the following:
+Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).
+This model was contributed by ikuyamada and nielsr. The original code can be found here.
+Usage tips
+
+This implementation is the same as [RobertaModel] with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+LUKE treats entities as input tokens; therefore, it takes entity_ids, entity_attention_mask,
+  entity_token_type_ids and entity_position_ids as extra input. You can obtain those using
+  [LukeTokenizer].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d0acab8f62a1648872d314186be588b9aba208c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+[LukeTokenizer] takes entities and entity_spans (character-based start and end
+  positions of the entities in the input text) as extra input. entities typically consist of [MASK] entities or
+  Wikipedia entities. The brief description when inputting these entities are as follows:
+
+Inputting [MASK] entities to compute entity representations: The [MASK] entity is used to mask entities to be
+    predicted during pretraining. When LUKE receives the [MASK] entity, it tries to predict the original entity by
+    gathering the information about the entity from the input text. Therefore, the [MASK] entity can be used to address
+    downstream tasks requiring the information of entities in text such as entity typing, relation classification, and
+    named entity recognition.
+
+Inputting Wikipedia entities to compute knowledge-enhanced token representations: LUKE learns rich information
+    (or knowledge) about Wikipedia entities during pretraining and stores the information in its entity embedding. By
+    using Wikipedia entities as input tokens, LUKE outputs token representations enriched by the information stored in
+    the embeddings of these entities. This is particularly effective for tasks requiring real-world knowledge, such as
+    question answering.
+
+There are three head models for the former use case:
+
+[LukeForEntityClassification], for tasks to classify a single entity in an input text such as
+    entity typing, e.g. the Open Entity dataset.
+    This model places a linear head on top of the output entity representation.
+
+[LukeForEntityPairClassification], for tasks to classify the relationship between two entities
+    such as relation classification, e.g. the TACRED dataset. This
+    model places a linear head on top of the concatenated output representation of the pair of given entities.
+[LukeForEntitySpanClassification], for tasks to classify the sequence of entity spans, such as
+    named entity recognition (NER). This model places a linear head on top of the output entity representations. You
+    can address NER using this model by inputting all possible entity spans in the text to the model.
+
+[LukeTokenizer] has a task argument, which enables you to easily create an input to these
+  head models by specifying task="entity_classification", task="entity_pair_classification", or
+  task="entity_span_classification". Please refer to the example code of each head models.
+Usage example:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeca7db4c121d10e189780189d974248ea8226ec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+from transformers import LukeTokenizer, LukeModel, LukeForEntityPairClassification
+model = LukeModel.from_pretrained("studio-ousia/luke-base")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+
+Example 1: Computing the contextualized entity representation corresponding to the entity mention "Beyoncé"
+
+text = "Beyoncé lives in Los Angeles."
+entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+inputs = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 2: Inputting Wikipedia entities to obtain enriched contextualized representations
+
+entities = [
+     "Beyoncé",
+     "Los Angeles",
+ ]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "Los Angeles"
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+outputs = model(**inputs)
+word_last_hidden_state = outputs.last_hidden_state
+entity_last_hidden_state = outputs.entity_last_hidden_state
+
+Example 3: Classifying the relationship between two entities using LukeForEntityPairClassification head model
+
+model = LukeForEntityPairClassification.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-large-finetuned-tacred")
+entity_spans = [(0, 7), (17, 28)]  # character-based entity spans corresponding to "Beyoncé" and "Los Angeles"
+inputs = tokenizer(text, entity_spans=entity_spans, return_tensors="pt")
+outputs = model(**inputs)
+logits = outputs.logits
+predicted_class_idx = int(logits[0].argmax())
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..153be1f6171ab7e3e47dc4df88cfc966a0b68ef4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_luke.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+Resources
+
+A demo notebook on how to fine-tune [LukeForEntityPairClassification] for relation classification
+Notebooks showcasing how you to reproduce the results as reported in the paper with the HuggingFace implementation of LUKE
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+LukeConfig
+[[autodoc]] LukeConfig
+LukeTokenizer
+[[autodoc]] LukeTokenizer
+    - call
+    - save_vocabulary
+LukeModel
+[[autodoc]] LukeModel
+    - forward
+LukeForMaskedLM
+[[autodoc]] LukeForMaskedLM
+    - forward
+LukeForEntityClassification
+[[autodoc]] LukeForEntityClassification
+    - forward
+LukeForEntityPairClassification
+[[autodoc]] LukeForEntityPairClassification
+    - forward
+LukeForEntitySpanClassification
+[[autodoc]] LukeForEntitySpanClassification
+    - forward
+LukeForSequenceClassification
+[[autodoc]] LukeForSequenceClassification
+    - forward
+LukeForMultipleChoice
+[[autodoc]] LukeForMultipleChoice
+    - forward
+LukeForTokenClassification
+[[autodoc]] LukeForTokenClassification
+    - forward
+LukeForQuestionAnswering
+[[autodoc]] LukeForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..452eb7a7d5328a621376a949b76b40524b312106
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+LXMERT
+Overview
+The LXMERT model was proposed in LXMERT: Learning Cross-Modality Encoder Representations from Transformers by Hao Tan & Mohit Bansal. It is a series of bidirectional transformer encoders
+(one for the vision modality, one for the language modality, and then one to fuse both modalities) pretrained using a
+combination of masked language modeling, visual-language text alignment, ROI-feature regression, masked
+visual-attribute modeling, masked visual-object modeling, and visual-question answering objectives. The pretraining
+consists of multiple multi-modal datasets: MSCOCO, Visual-Genome + Visual-Genome Question Answering, VQA 2.0, and GQA.
+The abstract from the paper is the following:
+Vision-and-language reasoning requires an understanding of visual concepts, language semantics, and, most importantly,
+the alignment and relationships between these two modalities. We thus propose the LXMERT (Learning Cross-Modality
+Encoder Representations from Transformers) framework to learn these vision-and-language connections. In LXMERT, we
+build a large-scale Transformer model that consists of three encoders: an object relationship encoder, a language
+encoder, and a cross-modality encoder. Next, to endow our model with the capability of connecting vision and language
+semantics, we pre-train the model with large amounts of image-and-sentence pairs, via five diverse representative
+pretraining tasks: masked language modeling, masked object prediction (feature regression and label classification),
+cross-modality matching, and image question answering. These tasks help in learning both intra-modality and
+cross-modality relationships. After fine-tuning from our pretrained parameters, our model achieves the state-of-the-art
+results on two visual question answering datasets (i.e., VQA and GQA). We also show the generalizability of our
+pretrained cross-modality model by adapting it to a challenging visual-reasoning task, NLVR, and improve the previous
+best result by 22% absolute (54% to 76%). Lastly, we demonstrate detailed ablation studies to prove that both our novel
+model components and pretraining strategies significantly contribute to our strong results; and also present several
+attention visualizations for the different encoders
+This model was contributed by eltoto1219. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48bb144d0b859e673cfdd24d68ca9fc2674e0c69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_lxmert.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+Bounding boxes are not necessary to be used in the visual feature embeddings, any kind of visual-spacial features
+  will work.
+Both the language hidden states and the visual hidden states that LXMERT outputs are passed through the
+  cross-modality layer, so they contain information from both modalities. To access a modality that only attends to
+  itself, select the vision/language hidden states from the first input in the tuple.
+The bidirectional cross-modality encoder attention only returns attention values when the language modality is used
+  as the input and the vision modality is used as the context vector. Further, while the cross-modality encoder
+  contains self-attention for each respective modality and cross-attention, only the cross attention is returned and
+  both self attention outputs are disregarded.
+
+Resources
+
+Question answering task guide
+
+LxmertConfig
+[[autodoc]] LxmertConfig
+LxmertTokenizer
+[[autodoc]] LxmertTokenizer
+LxmertTokenizerFast
+[[autodoc]] LxmertTokenizerFast
+Lxmert specific outputs
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertModelOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForPreTrainingOutput
+[[autodoc]] models.lxmert.modeling_lxmert.LxmertForQuestionAnsweringOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertModelOutput
+[[autodoc]] models.lxmert.modeling_tf_lxmert.TFLxmertForPreTrainingOutput
+
+LxmertModel
+[[autodoc]] LxmertModel
+    - forward
+LxmertForPreTraining
+[[autodoc]] LxmertForPreTraining
+    - forward
+LxmertForQuestionAnswering
+[[autodoc]] LxmertForQuestionAnswering
+    - forward
+
+TFLxmertModel
+[[autodoc]] TFLxmertModel
+    - call
+TFLxmertForPreTraining
+[[autodoc]] TFLxmertForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18f66dee30cebfac4531193a900968d209f5e309
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+M2M100
+Overview
+The M2M100 model was proposed in Beyond English-Centric Multilingual Machine Translation by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky,
+Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy
+Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
+The abstract from the paper is the following:
+Existing work in translation demonstrated the potential of massively multilingual machine translation by training a
+single model able to translate between any pair of languages. However, much of this work is English-Centric by training
+only on data which was translated from or to English. While this is supported by large sources of training data, it
+does not reflect translation needs worldwide. In this work, we create a true Many-to-Many multilingual translation
+model that can translate directly between any pair of 100 languages. We build and open source a training dataset that
+covers thousands of language directions with supervised data, created through large-scale mining. Then, we explore how
+to effectively increase model capacity through a combination of dense scaling and language-specific sparse parameters
+to create high quality models. Our focus on non-English-Centric models brings gains of more than 10 BLEU when directly
+translating between non-English directions while performing competitively to the best single systems of WMT. We
+open-source our scripts so that others may reproduce the data, evaluation, and final M2M-100 model.
+This model was contributed by valhalla.
+Usage tips and examples
+M2M100 is a multilingual encoder-decoder (seq-to-seq) model primarily intended for translation tasks. As the model is
+multilingual it expects the sequences in a certain format: A special language id token is used as prefix in both the
+source and target text. The source text format is [lang_code] X [eos], where lang_code is source language
+id for source text and target language id for target text, with X being the source or target text.
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2f206c09bca86f259da8db39214c3001245814a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+The [M2M100Tokenizer] depends on sentencepiece so be sure to install it before running the
+examples. To install sentencepiece run pip install sentencepiece.
+Supervised Training
+thon
+from transformers import M2M100Config, M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", src_lang="en", tgt_lang="fr")
+src_text = "Life is like a box of chocolates."
+tgt_text = "La vie est comme une boîte de chocolat."
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+loss = model(**model_inputs).loss  # forward pass
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c1db66f7ea01500de9db07b5f1d53f6ba635f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+Generation
+M2M100 uses the eos_token_id as the decoder_start_token_id for generation with the target language id 
+being forced as the first generated token. To force the target language id as the first generated token, pass the 
+forced_bos_token_id parameter to the generate method. The following example shows how to translate between 
+Hindi to French and Chinese to English using the facebook/m2m100_418M checkpoint.
+thon
+
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+chinese_text = "生活就像一盒巧克力。"
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+translate Chinese to English
+tokenizer.src_lang = "zh"
+encoded_zh = tokenizer(chinese_text, return_tensors="pt")
+generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"Life is like a box of chocolate."
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92b01125fc68eabd5c7cf570234ede8ce8bf554d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_m2m_100.txt_chunk_3.txt
@@ -0,0 +1,44 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+M2M100Config
+[[autodoc]] M2M100Config
+M2M100Tokenizer
+[[autodoc]] M2M100Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+M2M100Model
+[[autodoc]] M2M100Model
+    - forward
+M2M100ForConditionalGeneration
+[[autodoc]] M2M100ForConditionalGeneration
+    - forward
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
+translate Hindi to French
+hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
+tokenizer.src_lang = "hi"
+encoded_hi = tokenizer(hi_text, return_tensors="pt").to("cuda")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+"La vie est comme une boîte de chocolat."
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c7f48e097254205a7db44c38ea7c419bf9ac4c0b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_madlad-400.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MADLAD-400
+Overview
+MADLAD-400 models were released in the paper MADLAD-400: A Multilingual And Document-Level Large Audited Dataset. 
+The abstract from the paper is the following: 
+We introduce MADLAD-400, a manually audited, general domain 3T token monolingual dataset based on CommonCrawl, spanning 419 languages. We discuss 
+the limitations revealed by self-auditing MADLAD-400, and the role data auditing
+had in the dataset creation process. We then train and release a 10.7B-parameter
+multilingual machine translation model on 250 billion tokens covering over 450
+languages using publicly available data, and find that it is competitive with models
+that are significantly larger, and report the results on different domains. In addition, we train a 8B-parameter language model, and assess the results on few-shot
+translation. We make the baseline models 1
+available to the research community.
+This model was added by Juarez Bochi. The original checkpoints can be found here. 
+This is a machine translation model that supports many low-resource languages, and that is competitive with models that are significantly larger.
+One can directly use MADLAD-400 weights without finetuning the model:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/madlad400-3b-mt")
+tokenizer = AutoTokenizer.from_pretrained("google/madlad400-3b-mt")
+inputs = tokenizer("<2pt> I love pizza!", return_tensors="pt")
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
+['Eu amo pizza!']
+
+Google has released the following variants:
+
+google/madlad400-3b-mt
+
+google/madlad400-7b-mt
+
+google/madlad400-7b-mt-bt
+
+google/madlad400-10b-mt
+
+The original checkpoints can be found here.
+
+Refer to T5's documentation page for all API references, code examples, and notebooks. For more details regarding training and evaluation of the MADLAD-400, refer to the model card.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c897cae3d4e27b6585cfa0fd899be2639924173
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+Mamba
+Overview
+The Mamba model was proposed in Mamba: Linear-Time Sequence Modeling with Selective State Spaces by Albert Gu and Tri Dao.
+This model is a new paradigm architecture based on state-space-models. You can read more about the intuition behind these here.
+The abstract from the paper is the following:
+Foundation models, now powering most of the exciting applications in deep learning, are almost universally based on the Transformer architecture and its core attention module. Many subquadratic-time architectures such as linear attention, gated convolution and recurrent models, and structured state space models (SSMs) have been developed to address Transformers' computational inefficiency on long sequences, but they have not performed as well as attention on important modalities such as language. We identify that a key weakness of such models is their inability to perform content-based reasoning, and make several improvements. First, simply letting the SSM parameters be functions of the input addresses their weakness with discrete modalities, allowing the model to selectively propagate or forget information along the sequence length dimension depending on the current token. Second, even though this change prevents the use of efficient convolutions, we design a hardware-aware parallel algorithm in recurrent mode. We integrate these selective SSMs into a simplified end-to-end neural network architecture without attention or even MLP blocks (Mamba). Mamba enjoys fast inference (5× higher throughput than Transformers) and linear scaling in sequence length, and its performance improves on real data up to million-length sequences. As a general sequence model backbone, Mamba achieves state-of-the-art performance across several modalities such as language, audio, and genomics. On language modeling, our Mamba-3B model outperforms Transformers of the same size and matches Transformers twice its size, both in pretraining and downstream evaluation.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33d586be3e6dc2a65057690d6f96435e07967411
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+Mamba is a new state space model architecture that rivals the classic Transformers. It is based on the line of progress on structured state space models, with an efficient hardware-aware design and implementation in the spirit of FlashAttention.
+Mamba stacks mixer layers, which are the equivalent of Attention layers. The core logic of mamba is held in the MambaMixer class.
+Two implementations cohabit: one is optimized and uses fast cuda kernels, while the other one is naive but can run on any device!
+The current implementation leverages the original cuda kernels: the equivalent of flash attention for Mamba are hosted in the mamba-ssm and the causal_conv1d repositories. Make sure to install them if your hardware supports them!
+Contributions to make the naive path faster are welcome 🤗
+
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage
+A simple generation example:
+thon 
+from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
+import torch
+tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
+model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
+input_ids = tokenizer("Hey how are you doing?", return_tensors= "pt")["input_ids"]
+out = model.generate(input_ids, max_new_tokens=10)
+print(tokenizer.batch_decode(out))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bc3bdcd4df3c276259d89e1b5092b3a5634cca8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mamba.txt_chunk_2.txt
@@ -0,0 +1,42 @@
+Peft finetuning
+The slow version is not very stable for training, and the fast one needs float32!
+python 
+from datasets import load_dataset
+from trl import SFTTrainer
+from peft import LoraConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
+model_id = "state-spaces/mamba-130m-hf"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+dataset = load_dataset("Abirate/english_quotes", split="train")
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=4,
+    logging_dir='./logs',
+    logging_steps=10,
+    learning_rate=2e-3
+)
+lora_config =  LoraConfig(
+        r=8,
+        target_modules=["x_proj", "embeddings", "in_proj", "out_proj"],
+        task_type="CAUSAL_LM",
+        bias="none"
+)
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    args=training_args,
+    peft_config=lora_config,
+    train_dataset=dataset,
+    dataset_text_field="quote",
+)
+trainer.train()
+MambaConfig
+[[autodoc]] MambaConfig
+MambaModel
+[[autodoc]] MambaModel
+    - forward
+MambaLMHeadModel
+[[autodoc]] MambaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc18eb90c34a8cf4ddc501df1b93a7b5c6bd389f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_0.txt
@@ -0,0 +1,39 @@
+MarianMT
+
+Overview
+A framework for translation models, using the same models as BART. Translations should be similar, but not identical to output in the test set linked to in each model card.
+This model was contributed by sshleifer.
+Implementation Notes
+
+Each model is about 298 MB on disk, there are more than 1,000 models.
+The list of supported language pairs can be found here.
+Models were originally trained by Jörg Tiedemann using the Marian C++ library, which supports fast training and translation.
+All models are transformer encoder-decoders with 6 layers in each component. Each model's performance is documented
+  in a model card.
+The 80 opus models that require BPE preprocessing are not supported.
+
+The modeling code is the same as [BartForConditionalGeneration] with a few minor modifications:
+
+static (sinusoid) positional embeddings (MarianConfig.static_position_embeddings=True)
+
+no layernorm_embedding (MarianConfig.normalize_embedding=False)
+the model starts generating with pad_token_id (which has 0 as a token_embedding) as the prefix (Bart uses
+    <s/>),
+Code to bulk convert models can be found in convert_marian_to_pytorch.py.
+
+Naming
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}
+The language codes used to name models are inconsistent. Two digit codes can usually be found here, three digit codes require googling "language
+  code {code}".
+Codes formatted like es_AR are usually code_{region}. That one is Spanish from Argentina.
+The models were converted in two stages. The first 1000 models use ISO-639-2 codes to identify languages, the second
+  group use a combination of ISO-639-5 codes and ISO-639-2 codes.
+
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2b15ac9a9a3d7df57c228cc7d09f6b8a425641b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Examples
+
+Since Marian models are smaller than many other translation models available in the library, they can be useful for
+  fine-tuning experiments and integration tests.
+Fine-tune on GPU
+
+Multilingual Models
+
+All model names use the following format: Helsinki-NLP/opus-mt-{src}-{tgt}:
+If a model can output multiple languages, and you should specify a language code by prepending the desired output
+  language to the src_text.
+You can see a models's supported language codes in its model card, under target constituents, like in opus-mt-en-roa.
+Note that if a model is only multilingual on the source side, like Helsinki-NLP/opus-mt-roa-en, no language
+  codes are required.
+
+New multi-lingual models from the Tatoeba-Challenge repo
+require 3 character language codes:
+thon
+
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fra<< this is a sentence in english that we want to translate to french",
+     ">>por<< This should go to portuguese",
+     ">>esp<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-roa"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+print(tokenizer.supported_language_codes)
+['>>zlm_Latn<<', '>>mfe<<', '>>hat<<', '>>pap<<', '>>ast<<', '>>cat<<', '>>ind<<', '>>glg<<', '>>wln<<', '>>spa<<', '>>fra<<', '>>ron<<', '>>por<<', '>>ita<<', '>>oci<<', '>>arg<<', '>>min<<']
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+[tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français",
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5deaf5e1066a603d582697fd74145eb81e8e8f82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Here is the code to see all available pretrained models on the hub:
+thon
+from huggingface_hub import list_models
+model_list = list_models()
+org = "Helsinki-NLP"
+model_ids = [x.modelId for x in model_list if x.modelId.startswith(org)]
+suffix = [x.split("/")[1] for x in model_ids]
+old_style_multi_models = [f"{org}/{s}" for s in suffix if s != s.lower()]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d19f0020fbc68dcf7743076929f4f1b8d0f8b4f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Old Style Multi-Lingual Models
+These are the old style multi-lingual models ported from the OPUS-MT-Train repo: and the members of each language
+group:
+python no-style
+['Helsinki-NLP/opus-mt-NORTH_EU-NORTH_EU',
+ 'Helsinki-NLP/opus-mt-ROMANCE-en',
+ 'Helsinki-NLP/opus-mt-SCANDINAVIA-SCANDINAVIA',
+ 'Helsinki-NLP/opus-mt-de-ZH',
+ 'Helsinki-NLP/opus-mt-en-CELTIC',
+ 'Helsinki-NLP/opus-mt-en-ROMANCE',
+ 'Helsinki-NLP/opus-mt-es-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-NORWAY',
+ 'Helsinki-NLP/opus-mt-fi-ZH',
+ 'Helsinki-NLP/opus-mt-fi_nb_no_nn_ru_sv_en-SAMI',
+ 'Helsinki-NLP/opus-mt-sv-NORWAY',
+ 'Helsinki-NLP/opus-mt-sv-ZH']
+GROUP_MEMBERS = {
+ 'ZH': ['cmn', 'cn', 'yue', 'ze_zh', 'zh_cn', 'zh_CN', 'zh_HK', 'zh_tw', 'zh_TW', 'zh_yue', 'zhs', 'zht', 'zh'],
+ 'ROMANCE': ['fr', 'fr_BE', 'fr_CA', 'fr_FR', 'wa', 'frp', 'oc', 'ca', 'rm', 'lld', 'fur', 'lij', 'lmo', 'es', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_ES', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_UY', 'es_VE', 'pt', 'pt_br', 'pt_BR', 'pt_PT', 'gl', 'lad', 'an', 'mwl', 'it', 'it_IT', 'co', 'nap', 'scn', 'vec', 'sc', 'ro', 'la'],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3492705bdddcb55bfd7125159446775f3fac6b4d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_4.txt
@@ -0,0 +1,8 @@
+'NORTH_EU': ['de', 'nl', 'fy', 'af', 'da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SCANDINAVIA': ['da', 'fo', 'is', 'no', 'nb', 'nn', 'sv'],
+ 'SAMI': ['se', 'sma', 'smj', 'smn', 'sms'],
+ 'NORWAY': ['nb_NO', 'nb', 'nn_NO', 'nn', 'nog', 'no_nb', 'no'],
+ 'CELTIC': ['ga', 'cy', 'br', 'gd', 'kw', 'gv']
+}
+Example of translating english to many romance languages, using old-style 2 character language codes
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c15f069ed7ea7dbbfe8686379074b766e33915f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_marian.txt_chunk_5.txt
@@ -0,0 +1,50 @@
+from transformers import MarianMTModel, MarianTokenizer
+src_text = [
+     ">>fr<< this is a sentence in english that we want to translate to french",
+     ">>pt<< This should go to portuguese",
+     ">>es<< And this to Spanish",
+ ]
+model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
+tokenizer = MarianTokenizer.from_pretrained(model_name)
+model = MarianMTModel.from_pretrained(model_name)
+translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
+tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
+["c'est une phrase en anglais que nous voulons traduire en français", 
+ 'Isto deve ir para o português.',
+ 'Y esto al español']
+
+Resources
+
+Translation task guide
+Summarization task guide
+Causal language modeling task guide
+
+MarianConfig
+[[autodoc]] MarianConfig
+MarianTokenizer
+[[autodoc]] MarianTokenizer
+    - build_inputs_with_special_tokens
+
+MarianModel
+[[autodoc]] MarianModel
+    - forward
+MarianMTModel
+[[autodoc]] MarianMTModel
+    - forward
+MarianForCausalLM
+[[autodoc]] MarianForCausalLM
+    - forward
+
+TFMarianModel
+[[autodoc]] TFMarianModel
+    - call
+TFMarianMTModel
+[[autodoc]] TFMarianMTModel
+    - call
+
+FlaxMarianModel
+[[autodoc]] FlaxMarianModel
+    - call
+FlaxMarianMTModel
+[[autodoc]] FlaxMarianMTModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5367c830b3a0a732b43a31e4cb6cfb20d542ff11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+MarkupLM
+Overview
+The MarkupLM model was proposed in MarkupLM: Pre-training of Text and Markup Language for Visually-rich Document
+Understanding by Junlong Li, Yiheng Xu, Lei Cui, Furu Wei. MarkupLM is BERT, but
+applied to HTML pages instead of raw text documents. The model incorporates additional embedding layers to improve
+performance, similar to LayoutLM.
+The model can be used for tasks like question answering on web pages or information extraction from web pages. It obtains
+state-of-the-art results on 2 important benchmarks:
+- WebSRC, a dataset for Web-Based Structural Reading Comprehension (a bit like SQuAD but for web pages)
+- SWDE, a dataset
+for information extraction from web pages (basically named-entity recognition on web pages)
+The abstract from the paper is the following:
+Multimodal pre-training with text, layout, and image has made significant progress for Visually-rich Document
+Understanding (VrDU), especially the fixed-layout documents such as scanned document images. While, there are still a
+large number of digital documents where the layout information is not fixed and needs to be interactively and
+dynamically rendered for visualization, making existing layout-based pre-training approaches not easy to apply. In this
+paper, we propose MarkupLM for document understanding tasks with markup languages as the backbone such as
+HTML/XML-based documents, where text and markup information is jointly pre-trained. Experiment results show that the
+pre-trained MarkupLM significantly outperforms the existing strong baseline models on several document understanding
+tasks. The pre-trained model and code will be publicly available.
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+In addition to input_ids, [~MarkupLMModel.forward] expects 2 additional inputs, namely xpath_tags_seq and xpath_subs_seq.
+These are the XPATH tags and subscripts respectively for each token in the input sequence.
+One can use [MarkupLMProcessor] to prepare all data for the model. Refer to the usage guide for more info.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10e02f046a2de43a1403fabd4ff085fd0583cf6c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+MarkupLM architecture. Taken from the original paper. 
+Usage: MarkupLMProcessor
+The easiest way to prepare data for the model is to use [MarkupLMProcessor], which internally combines a feature extractor
+([MarkupLMFeatureExtractor]) and a tokenizer ([MarkupLMTokenizer] or [MarkupLMTokenizerFast]). The feature extractor is
+used to extract all nodes and xpaths from the HTML strings, which are then provided to the tokenizer, which turns them into the
+token-level inputs of the model (input_ids etc.). Note that you can still use the feature extractor and tokenizer separately,
+if you only want to handle one of the two tasks.
+thon
+from transformers import MarkupLMFeatureExtractor, MarkupLMTokenizerFast, MarkupLMProcessor
+feature_extractor = MarkupLMFeatureExtractor()
+tokenizer = MarkupLMTokenizerFast.from_pretrained("microsoft/markuplm-base")
+processor = MarkupLMProcessor(feature_extractor, tokenizer)
+
+In short, one can provide HTML strings (and possibly additional data) to [MarkupLMProcessor],
+and it will create the inputs expected by the model. Internally, the processor first uses
+[MarkupLMFeatureExtractor] to get a list of nodes and corresponding xpaths. The nodes and
+xpaths are then provided to [MarkupLMTokenizer] or [MarkupLMTokenizerFast], which converts them
+to token-level input_ids, attention_mask, token_type_ids, xpath_subs_seq, xpath_tags_seq.
+Optionally, one can provide node labels to the processor, which are turned into token-level labels.
+[MarkupLMFeatureExtractor] uses Beautiful Soup, a Python library for
+pulling data out of HTML and XML files, under the hood. Note that you can still use your own parsing solution of
+choice, and provide the nodes and xpaths yourself to [MarkupLMTokenizer] or [MarkupLMTokenizerFast].
+In total, there are 5 use cases that are supported by the processor. Below, we list them all. Note that each of these
+use cases work for both batched and non-batched inputs (we illustrate them for non-batched inputs).
+Use case 1: web page classification (training, inference) + token classification (inference), parse_html = True
+This is the simplest case, in which the processor will use the feature extractor to get all nodes and xpaths from the HTML.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9120f5fd7a0fc32c3fcffd4c2670731566a63b89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  Here is my website.
+  
+  """
+note that you can also add provide all tokenizer parameters here such as padding, truncation
+encoding = processor(html_string, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 2: web page classification (training, inference) + token classification (inference), parse_html=False
+In case one already has obtained all nodes and xpaths, one doesn't need the feature extractor. In that case, one should
+provide the nodes and corresponding xpaths themselves to the processor, and make sure to set parse_html to False.
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+encoding = processor(nodes=nodes, xpaths=xpaths, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 3: token classification (training), parse_html=False
+For token classification tasks (such as SWDE), one can also provide the
+corresponding node labels in order to train a model. The processor will then convert these into token-level labels.
+By default, it will only label the first wordpiece of a word, and label the remaining wordpieces with -100, which is the
+ignore_index of PyTorch's CrossEntropyLoss. In case you want all wordpieces of a word to be labeled, you can
+initialize the tokenizer with only_label_first_subword set to False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63a3c7fd33b1c59a90c24fa0e336da452f1315b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_3.txt
@@ -0,0 +1,37 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+node_labels = [1, 2, 2, 1]
+encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq', 'labels'])
+
+Use case 4: web page question answering (inference), parse_html=True
+For question answering tasks on web pages, you can provide a question to the processor. By default, the
+processor will use the feature extractor to get all nodes and xpaths, and create [CLS] question tokens [SEP] word tokens [SEP].
+thon
+
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+html_string = """
+  <!DOCTYPE html>
+  
+  
+  Hello world
+  
+  
+  Welcome
+  My name is Niels.
+  
+  """
+question = "What's his name?"
+encoding = processor(html_string, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Use case 5: web page question answering (inference), parse_html=False
+For question answering tasks (such as WebSRC), you can provide a question to the processor. If you have extracted
+all nodes and xpaths yourself, you can provide them directly to the processor. Make sure to set parse_html to False.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23326e99e3387c48abbfc6c859bc8d5053b754f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_markuplm.txt_chunk_4.txt
@@ -0,0 +1,47 @@
+from transformers import MarkupLMProcessor
+processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-base")
+processor.parse_html = False
+nodes = ["hello", "world", "how", "are"]
+xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span", "html/body", "html/body/div"]
+question = "What's his name?"
+encoding = processor(nodes=nodes, xpaths=xpaths, questions=question, return_tensors="pt")
+print(encoding.keys())
+dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'xpath_tags_seq', 'xpath_subs_seq'])
+
+Resources
+
+Demo notebooks
+Text classification task guide
+Token classification task guide
+Question answering task guide
+
+MarkupLMConfig
+[[autodoc]] MarkupLMConfig
+    - all
+MarkupLMFeatureExtractor
+[[autodoc]] MarkupLMFeatureExtractor
+    - call
+MarkupLMTokenizer
+[[autodoc]] MarkupLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MarkupLMTokenizerFast
+[[autodoc]] MarkupLMTokenizerFast
+    - all
+MarkupLMProcessor
+[[autodoc]] MarkupLMProcessor
+    - call
+MarkupLMModel
+[[autodoc]] MarkupLMModel
+    - forward
+MarkupLMForSequenceClassification
+[[autodoc]] MarkupLMForSequenceClassification
+    - forward
+MarkupLMForTokenClassification
+[[autodoc]] MarkupLMForTokenClassification
+    - forward
+MarkupLMForQuestionAnswering
+[[autodoc]] MarkupLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bee15dc62ec0ca78637ed8ff584ef90570513ea7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Mask2Former
+Overview
+The Mask2Former model was proposed in Masked-attention Mask Transformer for Universal Image Segmentation by Bowen Cheng, Ishan Misra, Alexander G. Schwing, Alexander Kirillov, Rohit Girdhar. Mask2Former is a unified framework for panoptic, instance and semantic segmentation and features significant performance and efficiency improvements over MaskFormer.
+The abstract from the paper is the following:
+Image segmentation groups pixels with different semantics, e.g., category or instance membership. Each choice
+of semantics defines a task. While only the semantics of each task differ, current research focuses on designing specialized architectures for each task. We present Masked-attention Mask Transformer (Mask2Former), a new architecture capable of addressing any image segmentation task (panoptic, instance or semantic). Its key components include masked attention, which extracts localized features by constraining cross-attention within predicted mask regions. In addition to reducing the research effort by at least three times, it outperforms the best specialized architectures by a significant margin on four popular datasets. Most notably, Mask2Former sets a new state-of-the-art for panoptic segmentation (57.8 PQ on COCO), instance segmentation (50.1 AP on COCO) and semantic segmentation (57.7 mIoU on ADE20K).
+
+ Mask2Former architecture. Taken from the original paper. 
+This model was contributed by Shivalika Singh and Alara Dirik. The original code can be found here.
+Usage tips
+
+Mask2Former uses the same preprocessing and postprocessing steps as MaskFormer. Use [Mask2FormerImageProcessor] or [AutoImageProcessor] to prepare images and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~Mask2FormerImageProcessor.post_process_semantic_segmentation] or [~Mask2FormerImageProcessor.post_process_instance_segmentation] or [~Mask2FormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [Mask2FormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c76df11e14ab3ad95d68a96917bc58025cf2b6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mask2former.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mask2Former.
+
+Demo notebooks regarding inference + fine-tuning Mask2Former on custom data can be found here.
+Scripts for finetuning [Mask2Former] with [Trainer] or Accelerate can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Mask2FormerConfig
+[[autodoc]] Mask2FormerConfig
+MaskFormer specific outputs
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerModelOutput
+[[autodoc]] models.mask2former.modeling_mask2former.Mask2FormerForUniversalSegmentationOutput
+Mask2FormerModel
+[[autodoc]] Mask2FormerModel
+    - forward
+Mask2FormerForUniversalSegmentation
+[[autodoc]] Mask2FormerForUniversalSegmentation
+    - forward
+Mask2FormerImageProcessor
+[[autodoc]] Mask2FormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0448161d4633d26c132bfe4f878bf67e179edc41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+MaskFormer
+
+This is a recently introduced model so the API hasn't been tested extensively. There may be some bugs or slight
+breaking changes to fix it in the future. If you see something strange, file a Github Issue.
+
+Overview
+The MaskFormer model was proposed in Per-Pixel Classification is Not All You Need for Semantic Segmentation by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov. MaskFormer addresses semantic segmentation with a mask classification paradigm instead of performing classic pixel-level classification.
+The abstract from the paper is the following:
+Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
+The figure below illustrates the architecture of MaskFormer. Taken from the original paper.
+
+This model was contributed by francesco. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24d115db574724439be759375024dd6951c821f0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+This model was contributed by francesco. The original code can be found here.
+Usage tips
+
+MaskFormer's Transformer decoder is identical to the decoder of DETR. During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help the model output the correct number of objects of each class. If you set the parameter use_auxiliary_loss of [MaskFormerConfig] to True, then prediction feedforward neural networks and Hungarian losses are added after each decoder layer (with the FFNs sharing parameters).
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the MaskFormerLoss class of modeling_maskformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [MaskFormerImageProcessor] to prepare images for the model and optional targets for the model.
+To get the final segmentation, depending on the task, you can call [~MaskFormerImageProcessor.post_process_semantic_segmentation] or [~MaskFormerImageProcessor.post_process_panoptic_segmentation]. Both tasks can be solved using [MaskFormerForInstanceSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ee8cbc82f0efcc6faa52d8fefc4c090b9340a8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_maskformer.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+Resources
+
+All notebooks that illustrate inference as well as fine-tuning on custom data with MaskFormer can be found here.
+Scripts for finetuning [MaskFormer] with [Trainer] or Accelerate can be found here.
+
+MaskFormer specific outputs
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+MaskFormerConfig
+[[autodoc]] MaskFormerConfig
+MaskFormerImageProcessor
+[[autodoc]] MaskFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerFeatureExtractor
+[[autodoc]] MaskFormerFeatureExtractor
+    - call
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+MaskFormerModel
+[[autodoc]] MaskFormerModel
+    - forward
+MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29aca645518567dd8a9adb693bc2cdfb91c10853
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+MatCha
+Overview
+MatCha has been proposed in the paper MatCha: Enhancing Visual Language Pretraining with Math Reasoning and Chart Derendering, from Fangyu Liu, Francesco Piccinno, Syrine Krichene, Chenxi Pang, Kenton Lee, Mandar Joshi, Yasemin Altun, Nigel Collier, Julian Martin Eisenschlos.
+The abstract of the paper states the following:
+Visual language data such as plots, charts, and infographics are ubiquitous in the human world. However, state-of-the-art vision-language models do not perform well on these data. We propose MatCha (Math reasoning and Chart derendering pretraining) to enhance visual language models' capabilities in jointly modeling charts/plots and language data. Specifically, we propose several pretraining tasks that cover plot deconstruction and numerical reasoning which are the key capabilities in visual language modeling. We perform the MatCha pretraining starting from Pix2Struct, a recently proposed image-to-text visual language model. On standard benchmarks such as PlotQA and ChartQA, the MatCha model outperforms state-of-the-art methods by as much as nearly 20%. We also examine how well MatCha pretraining transfers to domains such as screenshots, textbook diagrams, and document figures and observe overall improvement, verifying the usefulness of MatCha pretraining on broader visual language tasks.
+Model description
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
+MatCha is a Visual Question Answering subset of Pix2Struct architecture. It renders the input question on the image and predicts the answer.
+Usage
+Currently 6 checkpoints are available for MatCha:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40e7bc8756de6e7c628f49cde77a494146d85e24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+google/matcha: the base MatCha model, used to fine-tune MatCha on downstream tasks
+google/matcha-chartqa: MatCha model fine-tuned on ChartQA dataset. It can be used to answer questions about charts.
+google/matcha-plotqa-v1: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-plotqa-v2: MatCha model fine-tuned on PlotQA dataset. It can be used to answer questions about plots.
+google/matcha-chart2text-statista: MatCha model fine-tuned on Statista dataset. 
+google/matcha-chart2text-pew: MatCha model fine-tuned on Pew dataset.
+
+The models finetuned on chart2text-pew and chart2text-statista are more suited for summarization, whereas the models finetuned on plotqa and chartqa are more suited for question answering.
+You can use these models as follows (example on a ChatQA dataset):
+thon
+from transformers import AutoProcessor, Pix2StructForConditionalGeneration
+import requests
+from PIL import Image
+model = Pix2StructForConditionalGeneration.from_pretrained("google/matcha-chartqa").to(0)
+processor = AutoProcessor.from_pretrained("google/matcha-chartqa")
+url = "https://raw.githubusercontent.com/vis-nlp/ChartQA/main/ChartQA%20Dataset/val/png/20294671002019.png"
+image = Image.open(requests.get(url, stream=True).raw)
+inputs = processor(images=image, text="Is the sum of all 4 places greater than Laos?", return_tensors="pt").to(0)
+predictions = model.generate(**inputs, max_new_tokens=512)
+print(processor.decode(predictions[0], skip_special_tokens=True))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1a1b288d9bb458fe7b8484837ad482ef381902b2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_matcha.txt_chunk_2.txt
@@ -0,0 +1,8 @@
+Fine-tuning
+To fine-tune MatCha, refer to the pix2struct fine-tuning notebook. For Pix2Struct models, we have found out that fine-tuning the model with Adafactor and cosine learning rate scheduler leads to faste convergence:
+thon
+from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
+optimizer = Adafactor(self.parameters(), scale_parameter=False, relative_step=False, lr=0.01, weight_decay=1e-05)
+scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=40000)
+
+MatCha is a model that is trained using Pix2Struct architecture. You can find more information about Pix2Struct in the Pix2Struct documentation.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea40559ecbdc08fb8ca072a092d5bf1cca65999d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+MBart and MBart-50
+
+Overview of MBart
+The MBart model was presented in Multilingual Denoising Pre-training for Neural Machine Translation by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov Marjan
+Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
+According to the abstract, MBART is a sequence-to-sequence denoising auto-encoder pretrained on large-scale monolingual
+corpora in many languages using the BART objective. mBART is one of the first methods for pretraining a complete
+sequence-to-sequence model by denoising full texts in multiple languages, while previous approaches have focused only
+on the encoder, decoder, or reconstructing parts of the text.
+This model was contributed by valhalla. The Authors' code can be found here
+Training of MBart
+MBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for translation task. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+The regular [~MBartTokenizer.__call__] will encode source text format passed as first argument or with the text
+keyword, and target text format passed with the text_label keyword argument.
+
+Supervised training
+
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX", tgt_lang="ro_RO")
+example_english_phrase = "UN Chief Says There Is No Military Solution in Syria"
+expected_translation_romanian = "Şeful ONU declară că nu există o soluţie militară în Siria"
+inputs = tokenizer(example_english_phrase, text_target=expected_translation_romanian, return_tensors="pt")
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
+forward pass
+model(**inputs)
+
+Generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a47249b261c8ee290cbf609c64a3c809f2fbdf11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Generation
+
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate English to Romanian using the facebook/mbart-large-en-ro model.
+thon
+
+from transformers import MBartForConditionalGeneration, MBartTokenizer
+tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-en-ro", src_lang="en_XX")
+article = "UN Chief Says There Is No Military Solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["ro_RO"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Şeful ONU declară că nu există o soluţie militară în Siria"
+
+Overview of MBart-50
+MBart-50 was introduced in the Multilingual Translation with Extensible Multilingual Pretraining and Finetuning paper by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav
+Chaudhary, Jiatao Gu, Angela Fan. MBart-50 is created using the original mbart-large-cc25 checkpoint by extendeding
+its embedding layers with randomly initialized vectors for an extra set of 25 language tokens and then pretrained on 50
+languages.
+According to the abstract
+Multilingual translation models can be created through multilingual finetuning. Instead of finetuning on one
+direction, a pretrained model is finetuned on many directions at the same time. It demonstrates that pretrained models
+can be extended to incorporate additional languages without loss of performance. Multilingual finetuning improves on
+average 1 BLEU over the strongest baselines (being either multilingual from scratch or bilingual finetuning) while
+improving 9.3 BLEU on average over bilingual baselines from scratch.
+Training of MBart-50
+The text format for MBart-50 is slightly different from mBART. For MBart-50 the language id token is used as a prefix
+for both source and target text i.e the text format is [lang_code] X [eos], where lang_code is source
+language id for source text and target language id for target text, with X being the source or target text
+respectively.
+MBart-50 has its own tokenizer [MBart50Tokenizer].
+
+Supervised training
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7628ffadf554e07d9b70e2e3f12a58b72284017
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_2.txt
@@ -0,0 +1,12 @@
+Supervised training
+
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50", src_lang="en_XX", tgt_lang="ro_RO")
+src_text = " UN Chief Says There Is No Military Solution in Syria"
+tgt_text = "Şeful ONU declară că nu există o soluţie militară în Siria"
+model_inputs = tokenizer(src_text, text_target=tgt_text, return_tensors="pt")
+model(**model_inputs)  # forward pass
+
+Generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e246dc267ef36481090a76b229422e3783476b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_3.txt
@@ -0,0 +1,46 @@
+Generation
+
+To generate using the mBART-50 multilingual translation models, eos_token_id is used as the
+  decoder_start_token_id and the target language id is forced as the first generated token. To force the
+  target language id as the first generated token, pass the forced_bos_token_id parameter to the generate method.
+  The following example shows how to translate between Hindi to French and Arabic to English using the
+  facebook/mbart-50-large-many-to-many checkpoint.
+thon
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+article_hi = "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है"
+article_ar = "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا."
+model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translate Hindi to French
+tokenizer.src_lang = "hi_IN"
+encoded_hi = tokenizer(article_hi, return_tensors="pt")
+generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id["fr_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria."
+translate Arabic to English
+tokenizer.src_lang = "ar_AR"
+encoded_ar = tokenizer(article_ar, return_tensors="pt")
+generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
+=> "The Secretary-General of the United Nations says there is no military solution in Syria."
+
+Documentation resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MBartConfig
+[[autodoc]] MBartConfig
+MBartTokenizer
+[[autodoc]] MBartTokenizer
+    - build_inputs_with_special_tokens
+MBartTokenizerFast
+[[autodoc]] MBartTokenizerFast
+MBart50Tokenizer
+[[autodoc]] MBart50Tokenizer
+MBart50TokenizerFast
+[[autodoc]] MBart50TokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ede3e73d579c771e3711c99914f5765bab362d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mbart.txt_chunk_4.txt
@@ -0,0 +1,39 @@
+MBartModel
+[[autodoc]] MBartModel
+MBartForConditionalGeneration
+[[autodoc]] MBartForConditionalGeneration
+MBartForQuestionAnswering
+[[autodoc]] MBartForQuestionAnswering
+MBartForSequenceClassification
+[[autodoc]] MBartForSequenceClassification
+MBartForCausalLM
+[[autodoc]] MBartForCausalLM
+    - forward
+
+TFMBartModel
+[[autodoc]] TFMBartModel
+    - call
+TFMBartForConditionalGeneration
+[[autodoc]] TFMBartForConditionalGeneration
+    - call
+
+FlaxMBartModel
+[[autodoc]] FlaxMBartModel
+    - call
+    - encode
+    - decode
+FlaxMBartForConditionalGeneration
+[[autodoc]] FlaxMBartForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxMBartForSequenceClassification
+[[autodoc]] FlaxMBartForSequenceClassification
+    - call
+    - encode
+    - decode
+FlaxMBartForQuestionAnswering
+[[autodoc]] FlaxMBartForQuestionAnswering
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69c0522282a91579a04ad35ef0ccaa8d900ccafb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+M-CTC-T
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The M-CTC-T model was proposed in Pseudo-Labeling For Massively Multilingual Speech Recognition by Loren Lugosch, Tatiana Likhomanenko, Gabriel Synnaeve, and Ronan Collobert. The model is a 1B-param transformer encoder, with a CTC head over 8065 character labels and a language identification head over 60 language ID labels. It is trained on Common Voice (version 6.1, December 2020 release) and VoxPopuli. After training on Common Voice and VoxPopuli, the model is trained on Common Voice only. The labels are unnormalized character-level transcripts (punctuation and capitalization are not removed). The model takes as input Mel filterbank features from a 16Khz audio signal.
+The abstract from the paper is the following:
+Semi-supervised learning through pseudo-labeling has become a staple of state-of-the-art monolingual
+speech recognition systems. In this work, we extend pseudo-labeling to massively multilingual speech
+recognition with 60 languages. We propose a simple pseudo-labeling recipe that works well even
+with low-resource languages: train a supervised multilingual model, fine-tune it with semi-supervised
+learning on a target language, generate pseudo-labels for that language, and train a final model using
+pseudo-labels for all languages, either from scratch or by fine-tuning. Experiments on the labeled
+Common Voice and unlabeled VoxPopuli datasets show that our recipe can yield a model with better
+performance for many languages that also transfers well to LibriSpeech.
+This model was contributed by cwkeam. The original code can be found here.
+Usage tips
+The PyTorch version of this model is only available in torch 1.9 and higher.
+Resources
+
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69316c603fb2d81984de6283fc7a4275914e0b82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mctct.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Automatic speech recognition task guide
+
+MCTCTConfig
+[[autodoc]] MCTCTConfig
+MCTCTFeatureExtractor
+[[autodoc]] MCTCTFeatureExtractor
+    - call
+MCTCTProcessor
+[[autodoc]] MCTCTProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+MCTCTModel
+[[autodoc]] MCTCTModel
+    - forward
+MCTCTForCTC
+[[autodoc]] MCTCTForCTC
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6b2639b62617dc981ef6f55a9a4b34fefe8d488b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+MEGA
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The MEGA model was proposed in Mega: Moving Average Equipped Gated Attention by Xuezhe Ma, Chunting Zhou, Xiang Kong, Junxian He, Liangke Gui, Graham Neubig, Jonathan May, and Luke Zettlemoyer.
+MEGA proposes a new approach to self-attention with each encoder layer having a multi-headed exponential moving average in addition to a single head of standard dot-product attention, giving the attention mechanism
+stronger positional biases. This allows MEGA to perform competitively to Transformers on standard benchmarks including LRA
+while also having significantly fewer parameters. MEGA's compute efficiency allows it to scale to very long sequences, making it an
+attractive option for long-document NLP tasks.
+The abstract from the paper is the following:
+*The design choices in the Transformer attention mechanism, including weak inductive bias and quadratic computational complexity, have limited its application for modeling long sequences. In this paper, we introduce Mega, a simple, theoretically grounded, single-head gated attention mechanism equipped with (exponential) moving average to incorporate inductive bias of position-aware local dependencies into the position-agnostic attention mechanism. We further propose a variant of Mega that offers linear time and space complexity yet yields only minimal quality loss, by efficiently splitting the whole sequence into multiple chunks with fixed length. Extensive experiments on a wide range of sequence modeling benchmarks, including the Long Range Arena, neural machine translation, auto-regressive language modeling, and image and speech classification, show that Mega achieves significant improvements over other sequence models, including variants of Transformers and recent state space models. *
+This model was contributed by mnaylor.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12306262c99c99312e00520bd03e33624aa18577
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mega.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+MEGA can perform quite well with relatively few parameters. See Appendix D in the MEGA paper for examples of architectural specs which perform well in various settings. If using MEGA as a decoder, be sure to set bidirectional=False to avoid errors with default bidirectional.
+Mega-chunk is a variant of mega that reduces time and spaces complexity from quadratic to linear. Utilize chunking with MegaConfig.use_chunking and control chunk size with MegaConfig.chunk_size
+
+Implementation Notes
+
+The original implementation of MEGA had an inconsistent expectation of attention masks for padding and causal self-attention between the softmax attention and Laplace/squared ReLU method. This implementation addresses that inconsistency.
+The original implementation did not include token type embeddings; this implementation adds support for these, with the option controlled by MegaConfig.add_token_type_embeddings
+
+MegaConfig
+[[autodoc]] MegaConfig
+MegaModel
+[[autodoc]] MegaModel
+    - forward
+MegaForCausalLM
+[[autodoc]] MegaForCausalLM
+    - forward
+MegaForMaskedLM
+[[autodoc]] MegaForMaskedLM
+    - forward
+MegaForSequenceClassification
+[[autodoc]] MegaForSequenceClassification
+    - forward
+MegaForMultipleChoice
+[[autodoc]] MegaForMultipleChoice
+    - forward
+MegaForTokenClassification
+[[autodoc]] MegaForTokenClassification
+    - forward
+MegaForQuestionAnswering
+[[autodoc]] MegaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5e2af2513965e606ddb566c0ecca761fa742f62
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+MegatronBERT
+Overview
+The MegatronBERT model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2996ac2004d8d21344f66a1dd159069343974c31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, 
+it contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained BERT-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
+BERT-345M-uncased:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3abc285d893bdc505ad5f796fd4dc42ec066b1d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_uncased/zip
+-O megatron_bert_345m_v0_1_uncased.zip
+BERT-345M-cased:
+
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/zip -O
+megatron_bert_345m_v0_1_cased.zip
+Once you have obtained the checkpoints from NVIDIA GPU Cloud (NGC), you have to convert them to a format that will
+easily be loaded by Hugging Face Transformers and our port of the BERT code.
+The following commands allow you to do the conversion. We assume that the folder models/megatron_bert contains
+megatron_bert_345m_v0_1_{cased, uncased}.zip and that the commands are run from inside that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_uncased.zip
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_bert/convert_megatron_bert_checkpoint.py megatron_bert_345m_v0_1_cased.zip
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f1ca7923ff301135a08eb6b88357785f0a6894e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron-bert.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MegatronBertConfig
+[[autodoc]] MegatronBertConfig
+MegatronBertModel
+[[autodoc]] MegatronBertModel
+    - forward
+MegatronBertForMaskedLM
+[[autodoc]] MegatronBertForMaskedLM
+    - forward
+MegatronBertForCausalLM
+[[autodoc]] MegatronBertForCausalLM
+    - forward
+MegatronBertForNextSentencePrediction
+[[autodoc]] MegatronBertForNextSentencePrediction
+    - forward
+MegatronBertForPreTraining
+[[autodoc]] MegatronBertForPreTraining
+    - forward
+MegatronBertForSequenceClassification
+[[autodoc]] MegatronBertForSequenceClassification
+    - forward
+MegatronBertForMultipleChoice
+[[autodoc]] MegatronBertForMultipleChoice
+    - forward
+MegatronBertForTokenClassification
+[[autodoc]] MegatronBertForTokenClassification
+    - forward
+MegatronBertForQuestionAnswering
+[[autodoc]] MegatronBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..118f22651d9afa5b2d288de7993ea298198d2d24
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+MegatronGPT2
+Overview
+The MegatronGPT2 model was proposed in Megatron-LM: Training Multi-Billion Parameter Language Models Using Model
+Parallelism by Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley,
+Jared Casper and Bryan Catanzaro.
+The abstract from the paper is the following:
+Recent work in language modeling demonstrates that training large transformer models advances the state of the art in
+Natural Language Processing applications. However, very large models can be quite difficult to train due to memory
+constraints. In this work, we present our techniques for training very large transformer models and implement a simple,
+efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. Our
+approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model
+parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. We
+illustrate this approach by converging transformer based models up to 8.3 billion parameters using 512 GPUs. We sustain
+15.1 PetaFLOPs across the entire application with 76% scaling efficiency when compared to a strong single GPU baseline
+that sustains 39 TeraFLOPs, which is 30% of peak FLOPs. To demonstrate that large language models can further advance
+the state of the art (SOTA), we train an 8.3 billion parameter transformer language model similar to GPT-2 and a 3.9
+billion parameter model similar to BERT. We show that careful attention to the placement of layer normalization in
+BERT-like models is critical to achieving increased performance as the model size grows. Using the GPT-2 model we
+achieve SOTA results on the WikiText103 (10.8 compared to SOTA perplexity of 15.8) and LAMBADA (66.5% compared to SOTA
+accuracy of 63.2%) datasets. Our BERT model achieves SOTA results on the RACE dataset (90.9% compared to SOTA accuracy
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c258f366f6de71b4cea5e5a320150e8d9b40155
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+of 89.4%).
+This model was contributed by jdemouth. The original code can be found here. 
+That repository contains a multi-GPU and multi-node implementation of the Megatron Language models. In particular, it 
+contains a hybrid model parallel approach using "tensor parallel" and "pipeline parallel" techniques.
+Usage tips
+We have provided pretrained GPT2-345M checkpoints
+for use to evaluate or finetuning downstream tasks.
+To access these checkpoints, first sign up for and setup the NVIDIA GPU Cloud (NGC)
+Registry CLI. Further documentation for downloading models can be found in the NGC documentation.
+Alternatively, you can directly download the checkpoints using:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56eac0c24c2af6682d5b9329cf9b7261e6cdc536
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_megatron_gpt2.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/megatron_lm_345m/versions/v0.0/zip -O
+megatron_gpt2_345m_v0_0.zip
+Once you have obtained the checkpoint from NVIDIA GPU Cloud (NGC), you have to convert it to a format that will easily
+be loaded by Hugging Face Transformers GPT2 implementation.
+The following command allows you to do the conversion. We assume that the folder models/megatron_gpt2 contains
+megatron_gpt2_345m_v0_0.zip and that the command is run from that folder:
+
+python3 $PATH_TO_TRANSFORMERS/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py megatron_gpt2_345m_v0_0.zip
+ 
+MegatronGPT2 architecture is the same as OpenAI GPT-2 . Refer to GPT-2 documentation for information on 
+ configuration classes and their parameters.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a843456bd59923bea06c80d0342be035fc46f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+MGP-STR
+Overview
+The MGP-STR model was proposed in Multi-Granularity Prediction for Scene Text Recognition by Peng Wang, Cheng Da, and Cong Yao. MGP-STR is a conceptually simple yet powerful vision Scene Text Recognition (STR) model, which is built upon the Vision Transformer (ViT). To integrate linguistic knowledge, Multi-Granularity Prediction (MGP) strategy is proposed to inject information from the language modality into the model in an implicit way.
+The abstract from the paper is the following:
+Scene text recognition (STR) has been an active research topic in computer vision for years. To tackle this challenging problem, numerous innovative methods have been successively proposed and incorporating linguistic knowledge into STR models has recently become a prominent trend. In this work, we first draw inspiration from the recent progress in Vision Transformer (ViT) to construct a conceptually simple yet powerful vision STR model, which is built upon ViT and outperforms previous state-of-the-art models for scene text recognition, including both pure vision models and language-augmented methods. To integrate linguistic knowledge, we further propose a Multi-Granularity Prediction strategy to inject information from the language modality into the model in an implicit way, i.e. , subword representations (BPE and WordPiece) widely-used in NLP are introduced into the output space, in addition to the conventional character level representation, while no independent language model (LM) is adopted. The resultant algorithm (termed MGP-STR) is able to push the performance envelop of STR to an even higher level. Specifically, it achieves an average recognition accuracy of 93.35% on standard benchmarks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54ab7ec5158446df9fc400f875413bb79bd530e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mgp-str.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+MGP-STR architecture. Taken from the original paper. 
+MGP-STR is trained on two synthetic datasets MJSynth (MJ) and SynthText (ST) without fine-tuning on other datasets. It achieves state-of-the-art results on six standard Latin scene text benchmarks, including 3 regular text datasets (IC13, SVT, IIIT) and 3 irregular ones (IC15, SVTP, CUTE).
+This model was contributed by yuekun. The original code can be found here.
+Inference example
+[MgpstrModel] accepts images as input and generates three types of predictions, which represent textual information at different granularities.
+The three types of predictions are fused to give the final prediction result.
+The [ViTImageProcessor] class is responsible for preprocessing the input image and
+[MgpstrTokenizer] decodes the generated character tokens to the target string. The
+[MgpstrProcessor] wraps [ViTImageProcessor] and [MgpstrTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
+import requests
+from PIL import Image
+processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
+model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')
+load image from the IIIT-5k dataset
+url = "https://i.postimg.cc/ZKwLg2Gw/367-14.png"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(images=image, return_tensors="pt").pixel_values
+outputs = model(pixel_values)
+generated_text = processor.batch_decode(outputs.logits)['generated_text']
+
+MgpstrConfig
+[[autodoc]] MgpstrConfig
+MgpstrTokenizer
+[[autodoc]] MgpstrTokenizer
+    - save_vocabulary
+MgpstrProcessor
+[[autodoc]] MgpstrProcessor
+    - call
+    - batch_decode
+MgpstrModel
+[[autodoc]] MgpstrModel
+    - forward
+MgpstrForSceneTextRecognition
+[[autodoc]] MgpstrForSceneTextRecognition
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd109683ae34a4802b9e2edae74841a3168c847e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Mistral
+Overview
+Mistral was introduced in the this blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Mistral AI team is proud to release Mistral 7B, the most powerful language model for its size to date.
+Mistral-7B is the first large language model (LLM) released by mistral.ai.
+Architectural details
+Mistral-7B is a decoder-only Transformer with the following architectural choices:
+
+Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+
+For more details refer to the release blog post.
+License
+Mistral-7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 3 checkpoints:
+
+a base model, Mistral-7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+an instruction tuned model, Mistral-7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+an improved instruction tuned model, Mistral-7B-Instruct-v0.2, which improves upon v1.
+
+The base model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31154b0d8c7986c15500ffa5adcb743911f0f37c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mistral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2784c7c4c1447e99fff7b1c3ec58c82199959f66
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to ()"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..583983d5d140fcb35a82d55480c29566fd96332f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_3.txt
@@ -0,0 +1,11 @@
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mistral-7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mistral using quantization
+As the Mistral model has 7 billion parameters, that would require about 14GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter),that requires only about 3.5GB of RAM.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eef12a3f2dca01b42a43e49dfcdd5eff206be505
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mistral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mistral-7B can be found here. 🌎
+A blog post on how to fine-tune LLMs in 2024 using Hugging Face tooling. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfc6d9f40a837fac5637cb7a8e22d9d11e8652a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mistral.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+MistralConfig
+[[autodoc]] MistralConfig
+MistralModel
+[[autodoc]] MistralModel
+    - forward
+MistralForCausalLM
+[[autodoc]] MistralForCausalLM
+    - forward
+MistralForSequenceClassification
+[[autodoc]] MistralForSequenceClassification
+    - forward
+MistralForTokenClassification
+[[autodoc]] MistralForTokenClassification
+    - forward
+FlaxMistralModel
+[[autodoc]] FlaxMistralModel
+    - call
+FlaxMistralForCausalLM
+[[autodoc]] FlaxMistralForCausalLM
+    - call
+TFMistralModel
+[[autodoc]] TFMistralModel
+    - call
+TFMistralForCausalLM
+[[autodoc]] TFMistralForCausalLM
+    - call
+TFMistralForSequenceClassification
+[[autodoc]] TFMistralForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5afc54755898128f2aa1d3218bc198d5060659
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+Mixtral
+Overview
+Mixtral-8x7B was introduced in the Mixtral of Experts blogpost by Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
+The introduction of the blog post says:
+Today, the team is proud to release Mixtral 8x7B, a high-quality sparse mixture of experts models (SMoE) with open weights. Licensed under Apache 2.0. Mixtral outperforms Llama 2 70B on most benchmarks with 6x faster inference. It is the strongest open-weight model with a permissive license and the best model overall regarding cost/performance trade-offs. In particular, it matches or outperforms GPT3.5 on most standard benchmarks.
+Mixtral-8x7B is the second large language model (LLM) released by mistral.ai, after Mistral-7B.
+Architectural details
+Mixtral-8x7B is a decoder-only Transformer with the following architectural choices:
+
+Mixtral is a Mixture of Experts (MoE) model with 8 experts per MLP, with a total of 45 billion parameters. To learn more about mixture-of-experts, refer to the blog post.
+Despite the model having 45 billion parameters,, the compute required for a single forward pass is the same as that of a 14 billion parameter model. This is because even though each of the experts have to be loaded in RAM (70B like ram requirement) each token from the hidden states are dispatched twice (top 2 routing) and thus the compute (the operation required at each forward computation) is just 2 X sequence_length.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60285f707e83c32159085c567502d502bcfabdb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+The following implementation details are shared with Mistral AI's first model Mistral-7B:
+- Sliding Window Attention - Trained with 8k context length and fixed cache size, with a theoretical attention span of 128K tokens
+- GQA (Grouped Query Attention) - allowing faster inference and lower cache size.
+- Byte-fallback BPE tokenizer - ensures that characters are never mapped to out of vocabulary tokens.
+For more details refer to the release blog post.
+License
+Mixtral-8x7B is released under the Apache 2.0 license.
+Usage tips
+The Mistral team has released 2 checkpoints:
+- a base model, Mixtral-8x7B-v0.1, which has been pre-trained to predict the next token on internet-scale data.
+- an instruction tuned model, Mixtral-8x7B-Instruct-v0.1, which is the base model optimized for chat purposes using supervised fine-tuning (SFT) and direct preference optimization (DPO).
+The base model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"My favourite condiment is to "
+
+The instruction tuned model can be used as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db91cb036175b28ca0f48c09beec8f015f197b00
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+The instruction tuned model can be used as follows:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"Mayonnaise can be made as follows: ()"
+
+As can be seen, the instruction-tuned model requires a chat template to be applied to make sure the inputs are prepared in the right format.
+Speeding up Mixtral by using Flash Attention
+The code snippets above showcase inference without any optimization tricks. However, one can drastically speed up the model by leveraging Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. Make also sure to load your model in half-precision (e.g. torch.float16)
+To load and run a model using Flash Attention-2, refer to the snippet below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81af69147a5b8203ed7ab70135902a94addc6d5e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", torch_dtype=torch.float16, attn_implementation="flash_attention_2", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
+prompt = "My favourite condiment is"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+Expected speedups
+Below is a expected speedup diagram that compares pure inference time between the native implementation in transformers using mistralai/Mixtral-8x7B-v0.1 checkpoint and the Flash Attention 2 version of the model.
+
+Sliding window Attention
+The current implementation supports the sliding window attention mechanism and memory efficient cache management. 
+To enable sliding window attention, just make sure to have a flash-attn version that is compatible with sliding window attention (>=2.3.0). 
+The Flash Attention-2 model uses also a more memory efficient cache slicing mechanism - as recommended per the official implementation of Mistral model that use rolling cache mechanism we keep the cache size fixed (self.config.sliding_window), support batched generation only for padding_side="left" and use the absolute position of the current token to compute the positional embedding.
+Shrinking down Mixtral using quantization
+As the Mixtral model has 45 billion parameters, that would require about 90GB of GPU RAM in half precision (float16), since each parameter is stored in 2 bytes. However, one can shrink down the size of the model using quantization. If the model is quantized to 4 bits (or half a byte per parameter), a single A100 with 40GB of RAM is enough to fit the entire model, as in that case only about 27 GB of RAM is required.
+Quantizing a model is as simple as passing a quantization_config to the model. Below, we'll leverage the BitsAndyBytes quantization (but refer to this page for other quantization methods):
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2af33340d88d1de3957de54e68150909d83c0d10
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+         load_in_4bit=True,
+         bnb_4bit_quant_type="nf4",
+         bnb_4bit_compute_dtype="torch.float16",
+ )
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1", quantization_config=True, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+prompt = "My favourite condiment is"
+messages = [
+     {"role": "user", "content": "What is your favourite condiment?"},
+     {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+     {"role": "user", "content": "Do you have mayonnaise recipes?"}
+ ]
+model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
+tokenizer.batch_decode(generated_ids)[0]
+"The expected output"
+
+This model was contributed by Younes Belkada and Arthur Zucker .
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Mixtral. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A demo notebook to perform supervised fine-tuning (SFT) of Mixtral-8x7B can be found here. 🌎
+A blog post on fine-tuning Mixtral-8x7B using PEFT. 🌎
+The Alignment Handbook by Hugging Face includes scripts and recipes to perform supervised fine-tuning (SFT) and direct preference optimization with Mistral-7B. This includes scripts for full fine-tuning, QLoRa on a single GPU as well as multi-GPU fine-tuning.
+Causal language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..281f12ec3d819afc01f4a863b451b3b6aa5d9c86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mixtral.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+MixtralConfig
+[[autodoc]] MixtralConfig
+MixtralModel
+[[autodoc]] MixtralModel
+    - forward
+MixtralForCausalLM
+[[autodoc]] MixtralForCausalLM
+    - forward
+MixtralForSequenceClassification
+[[autodoc]] MixtralForSequenceClassification
+    - forward
+MixtralForTokenClassification
+[[autodoc]] MixtralForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mluke.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6bc67f0224589399fbadbcb1da2cf45ae71f6b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mluke.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+mLUKE
+Overview
+The mLUKE model was proposed in mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models by Ryokan Ri, Ikuya Yamada, and Yoshimasa Tsuruoka. It's a multilingual extension
+of the LUKE model trained on the basis of XLM-RoBERTa.
+It is based on XLM-RoBERTa and adds entity embeddings, which helps improve performance on various downstream tasks
+involving reasoning about entities such as named entity recognition, extractive question answering, relation
+classification, cloze-style knowledge completion.
+The abstract from the paper is the following:
+Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual
+alignment information from Wikipedia entities. However, existing methods only exploit entity information in pretraining
+and do not explicitly use entities in downstream tasks. In this study, we explore the effectiveness of leveraging
+entity representations for downstream cross-lingual tasks. We train a multilingual language model with 24 languages
+with entity representations and show the model consistently outperforms word-based pretrained models in various
+cross-lingual transfer tasks. We also analyze the model and the key insight is that incorporating entity
+representations into the input allows us to extract more language-agnostic features. We also evaluate the model with a
+multilingual cloze prompt task with the mLAMA dataset. We show that entity-based prompt elicits correct factual
+knowledge more likely than using only word representations.
+This model was contributed by ryo0634. The original code can be found here.
+Usage tips
+One can directly plug in the weights of mLUKE into a LUKE model, like so:
+thon
+from transformers import LukeModel
+model = LukeModel.from_pretrained("studio-ousia/mluke-base")
+
+Note that mLUKE has its own tokenizer, [MLukeTokenizer]. You can initialize it as follows:
+thon
+from transformers import MLukeTokenizer
+tokenizer = MLukeTokenizer.from_pretrained("studio-ousia/mluke-base")
+
+As mLUKE's architecture is equivalent to that of LUKE, one can refer to LUKE's documentation page for all
+tips, code examples and notebooks.
+
+MLukeTokenizer
+[[autodoc]] MLukeTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70bff8475baade17c6f762d2bbc677aa9ff718ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+MMS
+Overview
+The MMS model was proposed in Scaling Speech Technology to 1,000+ Languages 
+by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli
+The abstract from the paper is the following:
+Expanding the language coverage of speech technology has the potential to improve access to information for many more people. 
+However, current speech technology is restricted to about one hundred languages which is a small fraction of the over 7,000
+languages spoken around the world. 
+The Massively Multilingual Speech (MMS) project increases the number of supported languages by 10-40x, depending on the task. 
+The main ingredients are a new dataset based on readings of publicly available religious texts and effectively leveraging
+self-supervised learning. We built pre-trained wav2vec 2.0 models covering 1,406 languages, 
+a single multilingual automatic speech recognition model for 1,107 languages, speech synthesis models 
+for the same number of languages, as well as a language identification model for 4,017 languages. 
+Experiments show that our multilingual speech recognition model more than halves the word error rate of 
+Whisper on 54 languages of the FLEURS benchmark while being trained on a small fraction of the labeled data.
+Here are the different models open sourced in the MMS project. The models and code are originally released here. We have add them to the transformers framework, making them easier to use.
+Automatic Speech Recognition (ASR)
+The ASR model checkpoints  can be found here : mms-1b-fl102, mms-1b-l1107, mms-1b-all. For best accuracy, use the mms-1b-all model. 
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d76036392751b03f1fcdbdf88b5ddc025c2af834
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+All ASR models accept a float array corresponding to the raw waveform of the speech signal. The raw waveform should be pre-processed with [Wav2Vec2FeatureExtractor].
+The models were trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+You can load different language adapter weights for different languages via [~Wav2Vec2PreTrainedModel.load_adapter]. Language adapters only consists of roughly 2 million parameters 
+  and can therefore be efficiently loaded on the fly when needed.
+
+Loading
+By default MMS loads adapter weights for English. If you want to load adapter weights of another language 
+make sure to specify target_lang=<your-chosen-target-lang> as well as "ignore_mismatched_sizes=True.
+The ignore_mismatched_sizes=True keyword has to be passed to allow the language model head to be resized according
+to the vocabulary of the specified language.
+Similarly, the processor should be loaded with the same target language
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
+model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
+
+You can safely ignore a warning such as:
+text
+Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/mms-1b-all and are newly initialized because the shapes did not match:
+- lm_head.bias: found shape torch.Size([154]) in the checkpoint and torch.Size([314]) in the model instantiated
+- lm_head.weight: found shape torch.Size([154, 1280]) in the checkpoint and torch.Size([314, 1280]) in the model instantiated
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+
+If you want to use the ASR pipeline, you can load your chosen target language as such:
+
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94f3fd2e78b60e43f59285eb1f00af383d86b593
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_2.txt
@@ -0,0 +1,40 @@
+from transformers import pipeline
+model_id = "facebook/mms-1b-all"
+target_lang = "fra"
+pipe = pipeline(model=model_id, model_kwargs={"target_lang": "fra", "ignore_mismatched_sizes": True})
+
+Inference
+Next, let's look at how we can run MMS in inference and change adapter layers after having called [~PretrainedModel.from_pretrained]
+First, we load audio data in different languages using the Datasets.
+
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+French
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "fr", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+fr_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForCTC, AutoProcessor
+import torch
+model_id = "facebook/mms-1b-all"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model and transcribe the model output,
+just like we usually do for [Wav2Vec2ForCTC].
+
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+'joe keton disapproved of films and buster also had reservations about the media'
+
+We can now keep the same model in memory and simply switch out the language adapters by
+calling the convenient [~Wav2Vec2ForCTC.load_adapter] function for the model and [~Wav2Vec2CTCTokenizer.set_target_lang] for the tokenizer.
+We pass the target language as an input - "fra" for French.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b3bf59aa2078fe7e290bfd41e4e02bacdc3dd80
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_3.txt
@@ -0,0 +1,44 @@
+processor.tokenizer.set_target_lang("fra")
+model.load_adapter("fra")
+inputs = processor(fr_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+ids = torch.argmax(outputs, dim=-1)[0]
+transcription = processor.decode(ids)
+"ce dernier est volé tout au long de l'histoire romaine"
+
+In the same way the language can be switched out for all other supported languages. Please have a look at:
+py
+processor.tokenizer.vocab.keys()
+to see all supported languages.
+To further improve performance from ASR models, language model decoding can be used. See the documentation here for further details.  
+Speech Synthesis (TTS)
+MMS-TTS uses the same model architecture as VITS, which was added to 🤗 Transformers in v4.33. MMS trains a separate 
+model checkpoint for each of the 1100+ languages in the project. All available checkpoints can be found on the Hugging 
+Face Hub: facebook/mms-tts, and the inference 
+documentation under VITS.
+Inference
+To use the MMS model, first update to the latest version of the Transformers library:
+
+pip install --upgrade transformers accelerate
+Since the flow-based model in VITS is non-deterministic, it is good practice to set a seed to ensure reproducibility of 
+the outputs. 
+
+For languages with a Roman alphabet, such as English or French, the tokenizer can be used directly to 
+pre-process the text inputs. The following code example runs a forward pass using the MMS-TTS English checkpoint:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
+
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e97dfaeddc107b0b47b2d8180ef2adb15c9991c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_4.txt
@@ -0,0 +1,46 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("synthesized_speech.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with non-Roman alphabets, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3d6827124457d6d34984224f548d4d7f257e782
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_5.txt
@@ -0,0 +1,42 @@
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+Tips:
+
+The MMS-TTS checkpoints are trained on lower-cased, un-punctuated text. By default, the VitsTokenizer normalizes the inputs by removing any casing and punctuation, to avoid passing out-of-vocabulary characters to the model. Hence, the model is agnostic to casing and punctuation, so these should be avoided in the text prompt. You can disable normalisation by setting normalize=False in the call to the tokenizer, but this will lead to un-expected behaviour and is discouraged.
+The speaking rate can be varied by setting the attribute model.speaking_rate to a chosen value. Likewise, the randomness of the noise is controlled by model.noise_scale:
+
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+make deterministic
+set_seed(555)  
+make speech faster and more noisy
+model.speaking_rate = 1.5
+model.noise_scale = 0.8
+with torch.no_grad():
+   outputs = model(**inputs)
+
+Language Identification (LID)
+Different LID models are available based on the number of languages they can recognize - 126, 256, 512, 1024, 2048, 4017. 
+Inference
+First, we install transformers and some other libraries
+```bash
+pip install torch accelerate datasets[audio]
+pip install --upgrade transformers
+`
+Next, we load a couple of audio samples via datasets. Make sure that the audio data is sampled to 16000 kHz.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13580695d7fff87b075a549cd9a68a70169995bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_6.txt
@@ -0,0 +1,41 @@
+from datasets import load_dataset, Audio
+English
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "en", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+en_sample = next(iter(stream_data))["audio"]["array"]
+Arabic
+stream_data = load_dataset("mozilla-foundation/common_voice_13_0", "ar", split="test", streaming=True)
+stream_data = stream_data.cast_column("audio", Audio(sampling_rate=16000))
+ar_sample = next(iter(stream_data))["audio"]["array"]
+
+Next, we load the model and processor
+
+from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
+import torch
+model_id = "facebook/mms-lid-126"
+processor = AutoFeatureExtractor.from_pretrained(model_id)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
+
+Now we process the audio data, pass the processed audio data to the model to classify it into a language, just like we usually do for Wav2Vec2 audio classification models such as ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition
+
+English
+inputs = processor(en_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'eng'
+Arabic
+inputs = processor(ar_sample, sampling_rate=16_000, return_tensors="pt")
+with torch.no_grad():
+    outputs = model(**inputs).logits
+lang_id = torch.argmax(outputs, dim=-1)[0].item()
+detected_lang = model.config.id2label[lang_id]
+'ara'
+
+To see all the supported languages of a checkpoint, you can print out the language ids as follows:
+py
+processor.id2label.values()
+Audio Pretrained Models
+Pretrained models are available for two different sizes - 300M , 
+1Bil.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ac29105e8ab2459f8bd95c7ede821b972fc30cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mms.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+The MMS for ASR architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for further 
+details on how to finetune with models for various downstream tasks.
+MMS-TTS uses the same model architecture as VITS, refer to VITS's documentation page for API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..412f62ce1278338f281050bb83ef51c69e4b329d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+MobileBERT
+Overview
+The MobileBERT model was proposed in MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny
+Zhou. It's a bidirectional transformer based on the BERT model, which is compressed and accelerated using several
+approaches.
+The abstract from the paper is the following:
+Natural Language Processing (NLP) has recently achieved great success by using huge pre-trained models with hundreds
+of millions of parameters. However, these models suffer from heavy model sizes and high latency such that they cannot
+be deployed to resource-limited mobile devices. In this paper, we propose MobileBERT for compressing and accelerating
+the popular BERT model. Like the original BERT, MobileBERT is task-agnostic, that is, it can be generically applied to
+various downstream NLP tasks via simple fine-tuning. Basically, MobileBERT is a thin version of BERT_LARGE, while
+equipped with bottleneck structures and a carefully designed balance between self-attentions and feed-forward networks.
+To train MobileBERT, we first train a specially designed teacher model, an inverted-bottleneck incorporated BERT_LARGE
+model. Then, we conduct knowledge transfer from this teacher to MobileBERT. Empirical studies show that MobileBERT is
+4.3x smaller and 5.5x faster than BERT_BASE while achieving competitive results on well-known benchmarks. On the
+natural language inference tasks of GLUE, MobileBERT achieves a GLUEscore o 77.7 (0.6 lower than BERT_BASE), and 62 ms
+latency on a Pixel 4 phone. On the SQuAD v1.1/v2.0 question answering task, MobileBERT achieves a dev F1 score of
+90.0/79.2 (1.5/2.1 higher than BERT_BASE).
+This model was contributed by vshampor. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a09b86b5465abc6ecc12afeda101a78b9fad58aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_1.txt
@@ -0,0 +1,48 @@
+MobileBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather
+  than the left.
+MobileBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MobileBertConfig
+[[autodoc]] MobileBertConfig
+MobileBertTokenizer
+[[autodoc]] MobileBertTokenizer
+MobileBertTokenizerFast
+[[autodoc]] MobileBertTokenizerFast
+MobileBert specific outputs
+[[autodoc]] models.mobilebert.modeling_mobilebert.MobileBertForPreTrainingOutput
+[[autodoc]] models.mobilebert.modeling_tf_mobilebert.TFMobileBertForPreTrainingOutput
+
+MobileBertModel
+[[autodoc]] MobileBertModel
+    - forward
+MobileBertForPreTraining
+[[autodoc]] MobileBertForPreTraining
+    - forward
+MobileBertForMaskedLM
+[[autodoc]] MobileBertForMaskedLM
+    - forward
+MobileBertForNextSentencePrediction
+[[autodoc]] MobileBertForNextSentencePrediction
+    - forward
+MobileBertForSequenceClassification
+[[autodoc]] MobileBertForSequenceClassification
+    - forward
+MobileBertForMultipleChoice
+[[autodoc]] MobileBertForMultipleChoice
+    - forward
+MobileBertForTokenClassification
+[[autodoc]] MobileBertForTokenClassification
+    - forward
+MobileBertForQuestionAnswering
+[[autodoc]] MobileBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd08de49d026ca3e694b293b756c61d8e7fd13a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilebert.txt_chunk_2.txt
@@ -0,0 +1,24 @@
+TFMobileBertModel
+[[autodoc]] TFMobileBertModel
+    - call
+TFMobileBertForPreTraining
+[[autodoc]] TFMobileBertForPreTraining
+    - call
+TFMobileBertForMaskedLM
+[[autodoc]] TFMobileBertForMaskedLM
+    - call
+TFMobileBertForNextSentencePrediction
+[[autodoc]] TFMobileBertForNextSentencePrediction
+    - call
+TFMobileBertForSequenceClassification
+[[autodoc]] TFMobileBertForSequenceClassification
+    - call
+TFMobileBertForMultipleChoice
+[[autodoc]] TFMobileBertForMultipleChoice
+    - call
+TFMobileBertForTokenClassification
+[[autodoc]] TFMobileBertForTokenClassification
+    - call
+TFMobileBertForQuestionAnswering
+[[autodoc]] TFMobileBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f9e878639350499ef57be35fe50f9cd59b36763
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+MobileNet V1
+Overview
+The MobileNet model was proposed in MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+The abstract from the paper is the following:
+We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.
+This model was contributed by matthijs. The original code and weights can be found here.
+Usage tips
+
+The checkpoints are named mobilenet_v1_depth_size, for example mobilenet_v1_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV1ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV1Config] with tf_padding = False.
+
+Unsupported features:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56aa0c6bed5480eb6430ec2644594d2729f1962b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v1.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Unsupported features:
+
+The [MobileNetV1Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use a 7x7 average pooling layer with stride 2 instead of global pooling. For larger inputs, this gives a pooled output that is larger than 1x1 pixel. The HuggingFace implementation does not support this.
+
+It is currently not possible to specify an output_stride. For smaller output strides, the original model invokes dilated convolution to prevent the spatial resolution from being reduced further. The output stride of the HuggingFace model is always 32.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the pointwise layers at indices 5, 11, 12, 13 for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV1.
+
+[MobileNetV1ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV1Config
+[[autodoc]] MobileNetV1Config
+MobileNetV1FeatureExtractor
+[[autodoc]] MobileNetV1FeatureExtractor
+    - preprocess
+MobileNetV1ImageProcessor
+[[autodoc]] MobileNetV1ImageProcessor
+    - preprocess
+MobileNetV1Model
+[[autodoc]] MobileNetV1Model
+    - forward
+MobileNetV1ForImageClassification
+[[autodoc]] MobileNetV1ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..33d61fb27b4348966ef2aa48de66179e6927ef87
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+MobileNet V2
+Overview
+The MobileNet model was proposed in MobileNetV2: Inverted Residuals and Linear Bottlenecks by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+The abstract from the paper is the following:
+In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3.
+The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters.
+This model was contributed by matthijs. The original code and weights can be found here for the main model and here for DeepLabV3+.
+Usage tips
+
+The checkpoints are named mobilenet_v2_depth_size, for example mobilenet_v2_1.0_224, where 1.0 is the depth multiplier (sometimes also referred to as "alpha" or the width multiplier) and 224 is the resolution of the input images the model was trained on.
+
+Even though the checkpoint is trained on images of specific size, the model will work on images of any size. The smallest supported image size is 32x32.
+
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3abfc7b2dd02130a2d18c1a23a770986baf0fbff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+One can use [MobileNetV2ImageProcessor] to prepare images for the model.
+
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes). However, the model predicts 1001 classes: the 1000 classes from ImageNet plus an extra “background” class (index 0).
+
+The segmentation model uses a DeepLabV3+ head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+The original TensorFlow checkpoints use different padding rules than PyTorch, requiring the model to determine the padding amount at inference time, since this depends on the input image size. To use native PyTorch padding behavior, create a [MobileNetV2Config] with tf_padding = False.
+
+Unsupported features:
+
+The [MobileNetV2Model] outputs a globally pooled version of the last hidden state. In the original model it is possible to use an average pooling layer with a fixed 7x7 window and stride 1 instead of global pooling. For inputs that are larger than the recommended image size, this gives a pooled output that is larger than 1x1. The Hugging Face implementation does not support this.
+
+The original TensorFlow checkpoints include quantized models. We do not support these models as they include additional "FakeQuantization" operations to unquantize the weights.
+
+It's common to extract the output from the expansion layers at indices 10 and 13, as well as the output from the final 1x1 convolution layer, for downstream purposes. Using output_hidden_states=True returns the output from all intermediate layers. There is currently no way to limit this to specific layers.
+
+The DeepLabV3+ segmentation head does not use the final convolution layer from the backbone, but this layer gets computed anyway. There is currently no way to tell [MobileNetV2Model] up to which layer it should run.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileNetV2.
+
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79d31827a1700b275687eab25aaf0c6c5131d1f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilenet_v2.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+[MobileNetV2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileNetV2Config
+[[autodoc]] MobileNetV2Config
+MobileNetV2FeatureExtractor
+[[autodoc]] MobileNetV2FeatureExtractor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2ImageProcessor
+[[autodoc]] MobileNetV2ImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+MobileNetV2Model
+[[autodoc]] MobileNetV2Model
+    - forward
+MobileNetV2ForImageClassification
+[[autodoc]] MobileNetV2ForImageClassification
+    - forward
+MobileNetV2ForSemanticSegmentation
+[[autodoc]] MobileNetV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..327674b9808ab26c700a470a4fb0e5649ba989d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+MobileViT
+Overview
+The MobileViT model was proposed in MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer by Sachin Mehta and Mohammad Rastegari. MobileViT introduces a new layer that replaces local processing in convolutions with global processing using transformers.
+The abstract from the paper is the following:
+Light-weight convolutional neural networks (CNNs) are the de-facto for mobile vision tasks. Their spatial inductive biases allow them to learn representations with fewer parameters across different vision tasks. However, these networks are spatially local. To learn global representations, self-attention-based vision trans-formers (ViTs) have been adopted. Unlike CNNs, ViTs are heavy-weight. In this paper, we ask the following question: is it possible to combine the strengths of CNNs and ViTs to build a light-weight and low latency network for mobile vision tasks? Towards this end, we introduce MobileViT, a light-weight and general-purpose vision transformer for mobile devices. MobileViT presents a different perspective for the global processing of information with transformers, i.e., transformers as convolutions. Our results show that MobileViT significantly outperforms CNN- and ViT-based networks across different tasks and datasets. On the ImageNet-1k dataset, MobileViT achieves top-1 accuracy of 78.4% with about 6 million parameters, which is 3.2% and 6.2% more accurate than MobileNetv3 (CNN-based) and DeIT (ViT-based) for a similar number of parameters. On the MS-COCO object detection task, MobileViT is 5.7% more accurate than MobileNetv3 for a similar number of parameters.
+This model was contributed by matthijs. The TensorFlow version of the model was contributed by sayakpaul. The original code and weights can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..090da1b07e709ebe7a1104f8457d9793c8a6e46c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+MobileViT is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map. You can follow this tutorial for a lightweight introduction.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+As the name suggests MobileViT was designed to be performant and efficient on mobile phones. The TensorFlow versions of the MobileViT models are fully compatible with TensorFlow Lite.
+
+You can use the following code to convert a MobileViT checkpoint (be it image classification or semantic segmentation) to generate a
+  TensorFlow Lite model:
+
+from transformers import TFMobileViTForImageClassification
+import tensorflow as tf
+model_ckpt = "apple/mobilevit-xx-small"
+model = TFMobileViTForImageClassification.from_pretrained(model_ckpt)
+converter = tf.lite.TFLiteConverter.from_keras_model(model)
+converter.optimizations = [tf.lite.Optimize.DEFAULT]
+converter.target_spec.supported_ops = [
+    tf.lite.OpsSet.TFLITE_BUILTINS,
+    tf.lite.OpsSet.SELECT_TF_OPS,
+]
+tflite_model = converter.convert()
+tflite_filename = model_ckpt.split("/")[-1] + ".tflite"
+with open(tflite_filename, "wb") as f:
+    f.write(tflite_model)
+
+The resulting model will be just about an MB making it a good fit for mobile applications where resources and network
+  bandwidth can be constrained.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with MobileViT.
+
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2bee7fa2fdd804c3bd2b9adcc7c8439b94ce78
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevit.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+[MobileViTForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Semantic segmentation
+- Semantic segmentation task guide
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+MobileViTConfig
+[[autodoc]] MobileViTConfig
+MobileViTFeatureExtractor
+[[autodoc]] MobileViTFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+MobileViTImageProcessor
+[[autodoc]] MobileViTImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+MobileViTModel
+[[autodoc]] MobileViTModel
+    - forward
+MobileViTForImageClassification
+[[autodoc]] MobileViTForImageClassification
+    - forward
+MobileViTForSemanticSegmentation
+[[autodoc]] MobileViTForSemanticSegmentation
+    - forward
+
+TFMobileViTModel
+[[autodoc]] TFMobileViTModel
+    - call
+TFMobileViTForImageClassification
+[[autodoc]] TFMobileViTForImageClassification
+    - call
+TFMobileViTForSemanticSegmentation
+[[autodoc]] TFMobileViTForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a60f47c7e4d99a010238cd2c3631d4b98762b7f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+MobileViTV2
+Overview
+The MobileViTV2 model was proposed in Separable Self-attention for Mobile Vision Transformers by Sachin Mehta and Mohammad Rastegari.
+MobileViTV2 is the second version of MobileViT, constructed by replacing the multi-headed self-attention in MobileViT with separable self-attention.
+The abstract from the paper is the following:
+Mobile vision transformers (MobileViT) can achieve state-of-the-art performance across several mobile vision tasks, including classification and detection. Though these models have fewer parameters, they have high latency as compared to convolutional neural network-based models. The main efficiency bottleneck in MobileViT is the multi-headed self-attention (MHA) in transformers, which requires O(k2) time complexity with respect to the number of tokens (or patches) k. Moreover, MHA requires costly operations (e.g., batch-wise matrix multiplication) for computing self-attention, impacting latency on resource-constrained devices. This paper introduces a separable self-attention method with linear complexity, i.e. O(k). A simple yet effective characteristic of the proposed method is that it uses element-wise operations for computing self-attention, making it a good choice for resource-constrained devices. The improved model, MobileViTV2, is state-of-the-art on several mobile vision tasks, including ImageNet object classification and MS-COCO object detection. With about three million parameters, MobileViTV2 achieves a top-1 accuracy of 75.6% on the ImageNet dataset, outperforming MobileViT by about 1% while running 3.2× faster on a mobile device.
+This model was contributed by shehan97.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3167700c4c215eaa0a5f01ccd2d4980a1030013
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mobilevitv2.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+MobileViTV2 is more like a CNN than a Transformer model. It does not work on sequence data but on batches of images. Unlike ViT, there are no embeddings. The backbone model outputs a feature map.
+One can use [MobileViTImageProcessor] to prepare images for the model. Note that if you do your own preprocessing, the pretrained checkpoints expect images to be in BGR pixel order (not RGB).
+The available image classification checkpoints are pre-trained on ImageNet-1k (also referred to as ILSVRC 2012, a collection of 1.3 million images and 1,000 classes).
+The segmentation model uses a DeepLabV3 head. The available semantic segmentation checkpoints are pre-trained on PASCAL VOC.
+
+MobileViTV2Config
+[[autodoc]] MobileViTV2Config
+MobileViTV2Model
+[[autodoc]] MobileViTV2Model
+    - forward
+MobileViTV2ForImageClassification
+[[autodoc]] MobileViTV2ForImageClassification
+    - forward
+MobileViTV2ForSemanticSegmentation
+[[autodoc]] MobileViTV2ForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e95161c07941fd79bc72a895872dcead0715883
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+MPNet
+Overview
+The MPNet model was proposed in MPNet: Masked and Permuted Pre-training for Language Understanding by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
+MPNet adopts a novel pre-training method, named masked and permuted language modeling, to inherit the advantages of
+masked language modeling and permuted language modeling for natural language understanding.
+The abstract from the paper is the following:
+BERT adopts masked language modeling (MLM) for pre-training and is one of the most successful pre-training models.
+Since BERT neglects dependency among predicted tokens, XLNet introduces permuted language modeling (PLM) for
+pre-training to address this problem. However, XLNet does not leverage the full position information of a sentence and
+thus suffers from position discrepancy between pre-training and fine-tuning. In this paper, we propose MPNet, a novel
+pre-training method that inherits the advantages of BERT and XLNet and avoids their limitations. MPNet leverages the
+dependency among predicted tokens through permuted language modeling (vs. MLM in BERT), and takes auxiliary position
+information as input to make the model see a full sentence and thus reducing the position discrepancy (vs. PLM in
+XLNet). We pre-train MPNet on a large-scale dataset (over 160GB text corpora) and fine-tune on a variety of
+down-streaming tasks (GLUE, SQuAD, etc). Experimental results show that MPNet outperforms MLM and PLM by a large
+margin, and achieves better results on these tasks compared with previous state-of-the-art pre-trained methods (e.g.,
+BERT, XLNet, RoBERTa) under the same model setting.
+The original code can be found here.
+Usage tips
+MPNet doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just 
+separate your segments with the separation token tokenizer.sep_token (or [sep]).
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16e0963da82d994c9c214b9a80c9076f29cd17d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpnet.txt_chunk_1.txt
@@ -0,0 +1,54 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+MPNetConfig
+[[autodoc]] MPNetConfig
+MPNetTokenizer
+[[autodoc]] MPNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+MPNetTokenizerFast
+[[autodoc]] MPNetTokenizerFast
+
+MPNetModel
+[[autodoc]] MPNetModel
+    - forward
+MPNetForMaskedLM
+[[autodoc]] MPNetForMaskedLM
+    - forward
+MPNetForSequenceClassification
+[[autodoc]] MPNetForSequenceClassification
+    - forward
+MPNetForMultipleChoice
+[[autodoc]] MPNetForMultipleChoice
+    - forward
+MPNetForTokenClassification
+[[autodoc]] MPNetForTokenClassification
+    - forward
+MPNetForQuestionAnswering
+[[autodoc]] MPNetForQuestionAnswering
+    - forward
+
+TFMPNetModel
+[[autodoc]] TFMPNetModel
+    - call
+TFMPNetForMaskedLM
+[[autodoc]] TFMPNetForMaskedLM
+    - call
+TFMPNetForSequenceClassification
+[[autodoc]] TFMPNetForSequenceClassification
+    - call
+TFMPNetForMultipleChoice
+[[autodoc]] TFMPNetForMultipleChoice
+    - call
+TFMPNetForTokenClassification
+[[autodoc]] TFMPNetForTokenClassification
+    - call
+TFMPNetForQuestionAnswering
+[[autodoc]] TFMPNetForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c246eb7c9e2c456b344876f4b1a7a438686b3da5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mpt.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+MPT
+Overview
+The MPT model was proposed by the MosaicML team and released with multiple sizes and finetuned variants. The MPT models is a series of open source and commercially usable LLMs pre-trained on 1T tokens. 
+MPT models are GPT-style decoder-only transformers with several improvements: performance-optimized layer implementations, architecture changes that provide greater training stability, and the elimination of context length limits by replacing positional embeddings with ALiBi. 
+
+MPT base: MPT base pre-trained models on next token prediction 
+MPT instruct: MPT base models fine-tuned on instruction based tasks
+MPT storywriter: MPT base models fine-tuned for 2500 steps on 65k-token excerpts of fiction books contained in the books3 corpus, this enables the model to handle very long sequences
+
+The original code is available at the  llm-foundry repository.
+Read more about it in the release blogpost
+Usage tips
+
+Learn more about some techniques behind training of the model in this section of llm-foundry repository
+If you want to use the advanced version of the model (triton kernels, direct flash attention integration), you can still use the original model implementation by adding trust_remote_code=True when calling from_pretrained.
+
+Resources
+
+Fine-tuning Notebook on how to fine-tune MPT-7B on a free Google Colab instance to turn the model into a Chatbot.
+
+MptConfig
+[[autodoc]] MptConfig
+    - all
+MptModel
+[[autodoc]] MptModel
+    - forward
+MptForCausalLM
+[[autodoc]] MptForCausalLM
+    - forward
+MptForSequenceClassification
+[[autodoc]] MptForSequenceClassification
+    - forward
+MptForTokenClassification
+[[autodoc]] MptForTokenClassification
+    - forward
+MptForQuestionAnswering
+[[autodoc]] MptForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mra.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mra.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f387f7b54ba4f0a8242083d736885f8132536312
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mra.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+MRA
+Overview
+The MRA model was proposed in Multi Resolution Analysis (MRA) for Approximate Self-Attention by Zhanpeng Zeng, Sourav Pal, Jeffery Kline, Glenn M Fung, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a preferred model for many tasks in natural language processing and vision. Recent efforts on training and deploying Transformers more efficiently have identified many strategies to approximate the self-attention matrix, a key module in a Transformer architecture. Effective ideas include various prespecified sparsity patterns, low-rank basis expansions and combinations thereof. In this paper, we revisit classical Multiresolution Analysis (MRA) concepts such as Wavelets, whose potential value in this setting remains underexplored thus far. We show that simple approximations based on empirical feedback and design choices informed by modern hardware and implementation challenges, eventually yield a MRA-based approach for self-attention with an excellent performance profile across most criteria of interest. We undertake an extensive set of experiments and demonstrate that this multi-resolution scheme outperforms most efficient self-attention proposals and is favorable for both short and long sequences. Code is available at https://github.com/mlpen/mra-attention.
+This model was contributed by novice03.
+The original code can be found here.
+MraConfig
+[[autodoc]] MraConfig
+MraModel
+[[autodoc]] MraModel
+    - forward
+MraForMaskedLM
+[[autodoc]] MraForMaskedLM
+    - forward
+MraForSequenceClassification
+[[autodoc]] MraForSequenceClassification
+    - forward
+MraForMultipleChoice
+[[autodoc]] MraForMultipleChoice
+    - forward
+MraForTokenClassification
+[[autodoc]] MraForTokenClassification
+    - forward
+MraForQuestionAnswering
+[[autodoc]] MraForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f00612c3994ee48e1fb3ecc6b387e233b7366b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+mT5
+
+Overview
+The mT5 model was presented in mT5: A massively multilingual pre-trained text-to-text transformer by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya
+Siddhant, Aditya Barua, Colin Raffel.
+The abstract from the paper is the following:
+The recent "Text-to-Text Transfer Transformer" (T5) leveraged a unified text-to-text format and scale to attain
+state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a
+multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail
+the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual
+benchmarks. We also describe a simple technique to prevent "accidental translation" in the zero-shot setting, where a
+generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model
+checkpoints used in this work are publicly available.
+Note: mT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since mT5 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/mt5-small
+
+google/mt5-base
+
+google/mt5-large
+
+google/mt5-xl
+
+google/mt5-xxl.
+
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Resources
+
+Translation task guide
+Summarization task guide
+
+MT5Config
+[[autodoc]] MT5Config
+MT5Tokenizer
+[[autodoc]] MT5Tokenizer
+See [T5Tokenizer] for all details.
+MT5TokenizerFast
+[[autodoc]] MT5TokenizerFast
+See [T5TokenizerFast] for all details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f6d4001df7269bec6ce6784fdd2e2aa7907aa49
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mt5.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+MT5Model
+[[autodoc]] MT5Model
+MT5ForConditionalGeneration
+[[autodoc]] MT5ForConditionalGeneration
+MT5EncoderModel
+[[autodoc]] MT5EncoderModel
+MT5ForSequenceClassification
+[[autodoc]] MT5ForSequenceClassification
+MT5ForTokenClassification
+[[autodoc]] MT5ForTokenClassification
+MT5ForQuestionAnswering
+[[autodoc]] MT5ForQuestionAnswering
+
+TFMT5Model
+[[autodoc]] TFMT5Model
+TFMT5ForConditionalGeneration
+[[autodoc]] TFMT5ForConditionalGeneration
+TFMT5EncoderModel
+[[autodoc]] TFMT5EncoderModel
+
+FlaxMT5Model
+[[autodoc]] FlaxMT5Model
+FlaxMT5ForConditionalGeneration
+[[autodoc]] FlaxMT5ForConditionalGeneration
+FlaxMT5EncoderModel
+[[autodoc]] FlaxMT5EncoderModel
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c904c3a3e5bbc09da318434f28a394212871c6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+MusicGen
+Overview
+The MusicGen model was proposed in the paper Simple and Controllable Music Generation
+by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned
+on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a
+sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes,
+conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec,
+to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of
+the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g.
+hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates
+over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised
+of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for
+cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen
+can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better
+controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human
+studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark.
+Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by sanchit-gandhi. The original code can be found
+here. The pre-trained checkpoints can be found on the
+Hugging Face Hub.
+Usage tips
+
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89d3214421f5194faf0e25d457cf37a01a2dfe65
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+After downloading the original checkpoints from here , you can convert them using the conversion script available at
+src/transformers/models/musicgen/convert_musicgen_transformers.py with the following command:
+
+python src/transformers/models/musicgen/convert_musicgen_transformers.py \
+    --checkpoint small --pytorch_dump_folder /output/path --safe_serialization
+Generation
+MusicGen is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly
+better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default,
+and can be explicitly specified by setting do_sample=True in the call to [MusicgenForConditionalGeneration.generate],
+or by overriding the model's generation config (see below).
+Generation is limited by the sinusoidal positional embeddings to 30 second inputs. Meaning, MusicGen cannot generate more
+than 30 seconds of audio (1503 tokens), and input audio passed by Audio-Prompted Generation contributes to this limit so,
+given an input of 20 seconds of audio, MusicGen cannot generate more than 10 seconds of additional audio.
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen. The mono channel versions 
+generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), 
+and each set of codebooks is decoded independently through the audio compression model. The audio streams for each 
+channel are combined to give the final stereo output.
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method
+[MusicgenForConditionalGeneration.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen
+to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04b404e7b8907b3d3a78f1d50ac18f81008033cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+Or save them as a .wav file using a third-party library, e.g. scipy:
+thon
+
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+scipy.io.wavfile.write("musicgen_out.wav", rate=sampling_rate, data=audio_values[0, 0].numpy())
+
+Text-Conditional Generation
+The model can generate an audio sample conditioned on a text prompt through use of the [MusicgenProcessor] to pre-process
+the inputs:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits
+(which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or
+'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results,
+use guidance_scale=3 (default).
+Audio-Prompted Generation
+The same [MusicgenProcessor] can be used to pre-process an audio prompt that is used for audio continuation. In the
+following example, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command
+below:
+
+pip install --upgrade pip
+pip install datasets[audio]
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3411c33465b7684de37b00bd8e71a242b3568dc0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+pip install --upgrade pip
+pip install datasets[audio]
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+For batched audio-prompted generation, the generated audio_values can be post-processed to remove padding by using the
+[MusicgenProcessor] class:
+thon
+
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+post-process to remove padding from the batched audio
+audio_values = processor.batch_decode(audio_values, padding_mask=inputs.padding_mask)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aa558d0f6869f8ca3cdde58aff811110a507e08
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated 
+tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
+
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting 
+do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the 
+generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5
+2. MusicGen decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio encoder/decoder: used to encode an audio prompt to use as prompt tokens, and recover the audio waveform from the audio tokens predicted by the decoder
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenForCausalLM],
+or as a composite model that includes the text encoder and audio encoder/decoder, corresponding to the class
+[MusicgenForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first 
+specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenForCausalLM, MusicgenForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-small").decoder
+decoder = MusicgenForCausalLM.from_pretrained("facebook/musicgen-small", **decoder_config)
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small").decoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5abc0d9f15df554381abe4472d924a6f4261ce70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen.txt_chunk_5.txt
@@ -0,0 +1,22 @@
+Since the text encoder and audio encoder/decoder models are frozen during training, the MusicGen decoder [MusicgenForCausalLM]
+can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can
+be combined with the frozen text encoder and audio encoder/decoders to recover the composite [MusicgenForConditionalGeneration]
+model.
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenForConditionalGeneration.generate]
+MusicgenDecoderConfig
+[[autodoc]] MusicgenDecoderConfig
+MusicgenConfig
+[[autodoc]] MusicgenConfig
+MusicgenProcessor
+[[autodoc]] MusicgenProcessor
+MusicgenModel
+[[autodoc]] MusicgenModel
+    - forward
+MusicgenForCausalLM
+[[autodoc]] MusicgenForCausalLM
+    - forward
+MusicgenForConditionalGeneration
+[[autodoc]] MusicgenForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab737194131f037b09b825954e84557aedd77f89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+MusicGen Melody
+Overview
+The MusicGen Melody model was proposed in Simple and Controllable Music Generation by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
+MusicGen Melody is a single stage auto-regressive Transformer model capable of generating high-quality music samples conditioned on text descriptions or audio prompts. The text descriptions are passed through a frozen text encoder model to obtain a sequence of hidden-state representations. MusicGen is then trained to predict discrete audio tokens, or audio codes, conditioned on these hidden-states. These audio tokens are then decoded using an audio compression model, such as EnCodec, to recover the audio waveform.
+Through an efficient token interleaving pattern, MusicGen does not require a self-supervised semantic representation of the text/audio prompts, thus eliminating the need to cascade multiple models to predict a set of codebooks (e.g. hierarchically or upsampling). Instead, it is able to generate all the codebooks in a single forward pass.
+The abstract from the paper is the following:
+We tackle the task of conditional music generation. We introduce MusicGen, a single Language Model (LM) that operates over several streams of compressed discrete music representation, i.e., tokens. Unlike prior work, MusicGen is comprised of a single-stage transformer LM together with efficient token interleaving patterns, which eliminates the need for cascading several models, e.g., hierarchically or upsampling. Following this approach, we demonstrate how MusicGen can generate high-quality samples, while being conditioned on textual description or melodic features, allowing better controls over the generated output. We conduct extensive empirical evaluation, considering both automatic and human studies, showing the proposed approach is superior to the evaluated baselines on a standard text-to-music benchmark. Through ablation studies, we shed light over the importance of each of the components comprising MusicGen.
+This model was contributed by ylacombe. The original code can be found here. The pre-trained checkpoints can be found on the Hugging Face Hub.
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b020b50148d15fbe01eca0bf5725d65e550f084
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Difference with MusicGen
+There are two key differences with MusicGen:
+1. The audio prompt is used here as a conditional signal for the generated audio sample, whereas it's used for audio continuation in MusicGen.
+2. Conditional text and audio signals are concatenated to the decoder's hidden states instead of being used as a cross-attention signal, as in MusicGen.
+Generation
+MusicGen Melody is compatible with two generation modes: greedy and sampling. In practice, sampling leads to significantly better results than greedy, thus we encourage sampling mode to be used where possible. Sampling is enabled by default, and can be explicitly specified by setting do_sample=True in the call to [MusicgenMelodyForConditionalGeneration.generate], or by overriding the model's generation config (see below).
+Transformers supports both mono (1-channel) and stereo (2-channel) variants of MusicGen Melody. The mono channel versions generate a single set of codebooks. The stereo versions generate 2 sets of codebooks, 1 for each channel (left/right), and each set of codebooks is decoded independently through the audio compression model. The audio streams for each channel are combined to give the final stereo output.
+Audio Conditional Generation
+The model can generate an audio sample conditioned on a text and an audio prompt through use of the [MusicgenMelodyProcessor] to pre-process the inputs.
+In the following examples, we load an audio file using the 🤗 Datasets library, which can be pip installed through the command below:
+pip install --upgrade pip
+pip install datasets[audio]
+The audio file we are about to use is loaded as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a09643af05a463c5f2453c18691daa0066d60167
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+
+The audio prompt should ideally be free of the low-frequency signals usually produced by instruments such as drums and bass. The Demucs model can be used to separate vocals and other signals from the drums and bass components.
+If you wish to use Demucs, you first need to follow the installation steps here before using the following snippet:
+thon
+from demucs import pretrained
+from demucs.apply import apply_model
+from demucs.audio import convert_audio
+import torch
+wav = torch.tensor(sample["array"]).to(torch.float32)
+demucs = pretrained.get_model('htdemucs')
+wav = convert_audio(wav[None], sample["sampling_rate"], demucs.samplerate, demucs.audio_channels)
+wav = apply_model(demucs, wav[None])
+
+You can then use the following snippet to generate music:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=wav,
+     sampling_rate=demucs.samplerate,
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+You can also pass the audio signal directly without using Demucs, although the quality of the generation will probably be degraded:
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     audio=sample["array"],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..90351df81204db0d2b88bfbb6781e35c5687e4ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+The audio outputs are a three-dimensional Torch tensor of shape (batch_size, num_channels, sequence_length). To listen to the generated audio samples, you can either play them in an ipynb notebook:
+thon
+from IPython.display import Audio
+sampling_rate = model.config.audio_encoder.sampling_rate
+Audio(audio_values[0].numpy(), rate=sampling_rate)
+
+Or save them as a .wav file using a third-party library, e.g. soundfile:
+thon
+
+import soundfile as sf
+sampling_rate = model.config.audio_encoder.sampling_rate
+sf.write("musicgen_out.wav", audio_values[0].T.numpy(), sampling_rate)
+
+Text-only Conditional Generation
+The same [MusicgenMelodyProcessor] can be used to pre-process a text-only prompt. 
+thon
+
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inputs = processor(
+     text=["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+The guidance_scale is used in classifier free guidance (CFG), setting the weighting between the conditional logits (which are predicted from the text prompts) and the unconditional logits (which are predicted from an unconditional or 'null' prompt). Higher guidance scale encourages the model to generate samples that are more closely linked to the input prompt, usually at the expense of poorer audio quality. CFG is enabled by setting guidance_scale > 1. For best results, use guidance_scale=3 (default).
+You can also generate in batch:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b921a56c5abbbf4a1b9d4875813c57eb8da46b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_4.txt
@@ -0,0 +1,38 @@
+from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration
+from datasets import load_dataset
+processor = AutoProcessor.from_pretrained("facebook/musicgen-melody")
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+take the first quarter of the audio sample
+sample_1 = sample["array"][: len(sample["array"]) // 4]
+take the first half of the audio sample
+sample_2 = sample["array"][: len(sample["array"]) // 2]
+inputs = processor(
+     audio=[sample_1, sample_2],
+     sampling_rate=sample["sampling_rate"],
+     text=["80s blues track with groovy saxophone", "90s rock song with loud guitars and heavy drums"],
+     padding=True,
+     return_tensors="pt",
+ )
+audio_values = model.generate(**inputs, do_sample=True, guidance_scale=3, max_new_tokens=256)
+
+Unconditional Generation
+The inputs for unconditional (or 'null') generation can be obtained through the method [MusicgenMelodyProcessor.get_unconditional_inputs]:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration, MusicgenMelodyProcessor
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+unconditional_inputs = MusicgenMelodyProcessor.from_pretrained("facebook/musicgen-melody").get_unconditional_inputs(num_samples=1)
+audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
+
+Generation Configuration
+The default parameters that control the generation process, such as sampling, guidance scale and number of generated tokens, can be found in the model's generation config, and updated as desired:
+thon
+
+from transformers import MusicgenMelodyForConditionalGeneration
+model = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody")
+inspect the default generation config
+model.generation_config
+increase the guidance scale to 4.0
+model.generation_config.guidance_scale = 4.0
+decrease the max length to 256 tokens
+model.generation_config.max_length = 256
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11ac9613732567d397b55d86a8623cb05e8b7fd2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_5.txt
@@ -0,0 +1,20 @@
+Note that any arguments passed to the generate method will supersede those in the generation config, so setting do_sample=False in the call to generate will supersede the setting of model.generation_config.do_sample in the generation config.
+Model Structure
+The MusicGen model can be de-composed into three distinct stages:
+1. Text encoder: maps the text inputs to a sequence of hidden-state representations. The pre-trained MusicGen models use a frozen text encoder from either T5 or Flan-T5.
+2. MusicGen Melody decoder: a language model (LM) that auto-regressively generates audio tokens (or codes) conditional on the encoder hidden-state representations
+3. Audio decoder: used to recover the audio waveform from the audio tokens predicted by the decoder.
+Thus, the MusicGen model can either be used as a standalone decoder model, corresponding to the class [MusicgenMelodyForCausalLM], or as a composite model that includes the text encoder and audio encoder, corresponding to the class [MusicgenMelodyForConditionalGeneration]. If only the decoder needs to be loaded from the pre-trained checkpoint, it can be loaded by first specifying the correct config, or be accessed through the .decoder attribute of the composite model:
+thon
+
+from transformers import AutoConfig, MusicgenMelodyForCausalLM, MusicgenMelodyForConditionalGeneration
+Option 1: get decoder config and pass to .from_pretrained
+decoder_config = AutoConfig.from_pretrained("facebook/musicgen-melody").decoder
+decoder = MusicgenMelodyForCausalLM.from_pretrained("facebook/musicgen-melody", **decoder_config.to_dict())
+Option 2: load the entire composite model, but only return the decoder
+decoder = MusicgenMelodyForConditionalGeneration.from_pretrained("facebook/musicgen-melody").decoder
+
+Since the text encoder and audio encoder models are frozen during training, the MusicGen decoder [MusicgenMelodyForCausalLM] can be trained standalone on a dataset of encoder hidden-states and audio codes. For inference, the trained decoder can be combined with the frozen text encoder and audio encoder to recover the composite [MusicgenMelodyForConditionalGeneration] model.
+Checkpoint Conversion
+
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3d81852c9d2cf7de84eacff08b8226ceb47b0b68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_musicgen_melody.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+After downloading the original checkpoints from here, you can convert them using the conversion script available at src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py with the following command:
+
+python src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py \
+    --checkpoint="facebook/musicgen-melody" --pytorch_dump_folder /output/path
+Tips:
+* MusicGen is trained on the 32kHz checkpoint of Encodec. You should ensure you use a compatible version of the Encodec model.
+* Sampling mode tends to deliver better results than greedy - you can toggle sampling with the variable do_sample in the call to [MusicgenMelodyForConditionalGeneration.generate]
+MusicgenMelodyDecoderConfig
+[[autodoc]] MusicgenMelodyDecoderConfig
+MusicgenMelodyProcessor
+[[autodoc]] MusicgenMelodyProcessor
+    - get_unconditional_inputs
+MusicgenMelodyFeatureExtractor
+[[autodoc]] MusicgenMelodyFeatureExtractor
+    - _extract_stem_indices
+MusicgenMelodyConfig
+[[autodoc]] MusicgenMelodyConfig
+MusicgenMelodyModel
+[[autodoc]] MusicgenMelodyModel
+    - forward
+MusicgenMelodyForCausalLM
+[[autodoc]] MusicgenMelodyForCausalLM
+    - forward
+MusicgenMelodyForConditionalGeneration
+[[autodoc]] MusicgenMelodyForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88b7df247687b47f70b88bba22fa500cb89cae1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+MVP
+Overview
+The MVP model was proposed in MVP: Multi-task Supervised Pre-training for Natural Language Generation by Tianyi Tang, Junyi Li, Wayne Xin Zhao and Ji-Rong Wen.
+According to the abstract,
+
+MVP follows a standard Transformer encoder-decoder architecture.
+MVP is supervised pre-trained using labeled datasets.
+MVP also has task-specific soft prompts to stimulate the model's capacity in performing a certain task.
+MVP is specially designed for natural language generation and can be adapted to a wide range of generation tasks, including but not limited to summarization, data-to-text generation, open-ended dialogue system, story generation, question answering, question generation, task-oriented dialogue system, commonsense generation, paraphrase generation, text style transfer, and text simplification. Our model can also be adapted to natural language understanding tasks such as sequence classification and (extractive) question answering.
+
+This model was contributed by Tianyi Tang. The detailed information and instructions can be found here.
+Usage tips
+
+We have released a series of models here, including MVP, MVP with task-specific prompts, and multi-task pre-trained variants.
+If you want to use a model without prompts (standard Transformer), you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp').
+If you want to use a model with task-specific prompts, such as summarization, you can load it through MvpForConditionalGeneration.from_pretrained('RUCAIBox/mvp-summarization').
+Our model supports lightweight prompt tuning following Prefix-tuning with method set_lightweight_tuning().
+
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..206f7bf3f93736dc3a5bce57eb5b30e30ee3f151
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Usage examples
+For summarization, it is an example to use MVP and MVP with summarization-specific prompts.
+thon
+
+from transformers import MvpTokenizer, MvpForConditionalGeneration
+tokenizer = MvpTokenizer.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_prompt = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp-summarization")
+inputs = tokenizer(
+     "Summarize: You may want to stick it to your boss and leave your job, but don't do it if these are your reasons.",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Why You Shouldn't Quit Your Job"]
+generated_ids = model_with_prompt.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+["Don't do it if these are your reasons"]
+
+For data-to-text generation, it is an example to use MVP and multi-task pre-trained variants.
+thon
+
+from transformers import MvpTokenizerFast, MvpForConditionalGeneration
+tokenizer = MvpTokenizerFast.from_pretrained("RUCAIBox/mvp")
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp")
+model_with_mtl = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+inputs = tokenizer(
+     "Describe the following data: Iron Man | instance of | Superhero [SEP] Stan Lee | creator | Iron Man",
+     return_tensors="pt",
+ )
+generated_ids = model.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Stan Lee created the character of Iron Man, a fictional superhero appearing in American comic']
+generated_ids = model_with_mtl.generate(**inputs)
+tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+['Iron Man is a fictional superhero appearing in American comic books published by Marvel Comics.']
+
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59d6220692ca1eafdfd1c9d70f53f6cada977d9c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_mvp.txt_chunk_2.txt
@@ -0,0 +1,50 @@
+For lightweight tuning, i.e., fixing the model and only tuning prompts, you can load MVP with randomly initialized prompts or with task-specific prompts. Our code also supports Prefix-tuning with BART following the original paper.
+thon
+
+from transformers import MvpForConditionalGeneration
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mvp", use_prompt=True)
+the number of trainable parameters (full tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+468116832
+lightweight tuning with randomly initialized prompts
+model.set_lightweight_tuning()
+the number of trainable parameters (lightweight tuning)
+sum(p.numel() for p in model.parameters() if p.requires_grad)
+61823328
+lightweight tuning with task-specific prompts
+model = MvpForConditionalGeneration.from_pretrained("RUCAIBox/mtl-data-to-text")
+model.set_lightweight_tuning()
+original lightweight Prefix-tuning
+model = MvpForConditionalGeneration.from_pretrained("facebook/bart-large", use_prompt=True)
+model.set_lightweight_tuning()
+
+Resources
+
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Translation task guide
+Summarization task guide
+
+MvpConfig
+[[autodoc]] MvpConfig
+MvpTokenizer
+[[autodoc]] MvpTokenizer
+MvpTokenizerFast
+[[autodoc]] MvpTokenizerFast
+MvpModel
+[[autodoc]] MvpModel
+    - forward
+MvpForConditionalGeneration
+[[autodoc]] MvpForConditionalGeneration
+    - forward
+MvpForSequenceClassification
+[[autodoc]] MvpForSequenceClassification
+    - forward
+MvpForQuestionAnswering
+[[autodoc]] MvpForQuestionAnswering
+    - forward
+MvpForCausalLM
+[[autodoc]] MvpForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a049134e276a05a62d0d6c66b6e5bdb5b238fed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Neighborhood Attention Transformer
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+NAT was proposed in Neighborhood Attention Transformer
+by Ali Hassani, Steven Walton, Jiachen Li, Shen Li, and Humphrey Shi.
+It is a hierarchical vision transformer based on Neighborhood Attention, a sliding-window self attention pattern.
+The abstract from the paper is the following:
+*We present Neighborhood Attention (NA), the first efficient and scalable sliding-window attention mechanism for vision.
+NA is a pixel-wise operation, localizing self attention (SA) to the nearest neighboring pixels, and therefore enjoys a
+linear time and space complexity compared to the quadratic complexity of SA. The sliding-window pattern allows NA's
+receptive field to grow without needing extra pixel shifts, and preserves translational equivariance, unlike
+Swin Transformer's Window Self Attention (WSA). We develop NATTEN (Neighborhood Attention Extension), a Python package
+with efficient C++ and CUDA kernels, which allows NA to run up to 40% faster than Swin's WSA while using up to 25% less
+memory. We further present Neighborhood Attention Transformer (NAT), a new hierarchical transformer design based on NA
+that boosts image classification and downstream vision performance. Experimental results on NAT are competitive;
+NAT-Tiny reaches 83.2% top-1 accuracy on ImageNet, 51.4% mAP on MS-COCO and 48.4% mIoU on ADE20K, which is 1.9%
+ImageNet accuracy, 1.0% COCO mAP, and 2.6% ADE20K mIoU improvement over a Swin model with similar size. *
+
+ Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2a2fa21d5c613a8d27e8de2e376bd6bceb77519
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nat.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Neighborhood Attention compared to other attention patterns.
+Taken from the original paper.
+This model was contributed by Ali Hassani.
+The original code can be found here.
+Usage tips
+
+One can use the [AutoImageProcessor] API to prepare images for the model.
+NAT can be used as a backbone. When output_hidden_states = True,
+it will output both hidden_states and reshaped_hidden_states.
+The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than
+(batch_size, height, width, num_channels).
+
+Notes:
+- NAT depends on NATTEN's implementation of Neighborhood Attention.
+You can install it with pre-built wheels for Linux by referring to shi-labs.com/natten,
+or build on your system by running pip install natten.
+Note that the latter will likely take time to compile. NATTEN does not support Windows devices yet.
+- Patch size of 4 is only supported at the moment.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with NAT.
+
+[NatForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+NatConfig
+[[autodoc]] NatConfig
+NatModel
+[[autodoc]] NatModel
+    - forward
+NatForImageClassification
+[[autodoc]] NatForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc28b22f6faface904f0c94fbb63bfc19c8aaf4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Nezha
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Nezha model was proposed in NEZHA: Neural Contextualized Representation for Chinese Language Understanding by Junqiu Wei et al.
+The abstract from the paper is the following:
+The pre-trained language models have achieved great successes in various natural language understanding (NLU) tasks
+due to its capacity to capture the deep contextualized information in text by pre-training on large-scale corpora.
+In this technical report, we present our practice of pre-training language models named NEZHA (NEural contextualiZed
+representation for CHinese lAnguage understanding) on Chinese corpora and finetuning for the Chinese NLU tasks.
+The current version of NEZHA is based on BERT with a collection of proven improvements, which include Functional
+Relative Positional Encoding as an effective positional encoding scheme, Whole Word Masking strategy,
+Mixed Precision Training and the LAMB Optimizer in training the models. The experimental results show that NEZHA
+achieves the state-of-the-art performances when finetuned on several representative Chinese tasks, including
+named entity recognition (People's Daily NER), sentence matching (LCQMC), Chinese sentiment classification (ChnSenti)
+and natural language inference (XNLI).
+This model was contributed by sijunhe. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3169c9d4ba92cb120fa3ed58a4a3b97e1eb2fe14
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nezha.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NezhaConfig
+[[autodoc]] NezhaConfig
+NezhaModel
+[[autodoc]] NezhaModel
+    - forward
+NezhaForPreTraining
+[[autodoc]] NezhaForPreTraining
+    - forward
+NezhaForMaskedLM
+[[autodoc]] NezhaForMaskedLM
+    - forward
+NezhaForNextSentencePrediction
+[[autodoc]] NezhaForNextSentencePrediction
+    - forward
+NezhaForSequenceClassification
+[[autodoc]] NezhaForSequenceClassification
+    - forward
+NezhaForMultipleChoice
+[[autodoc]] NezhaForMultipleChoice
+    - forward
+NezhaForTokenClassification
+[[autodoc]] NezhaForTokenClassification
+    - forward
+NezhaForQuestionAnswering
+[[autodoc]] NezhaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd55ac49f779c351da760f5a8310ef4a9362b80c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+NLLB-MOE
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4623017dde4083bcb4d4ed899946acb27c89033c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This model was contributed by Arthur Zucker.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c0b9bec329052cfc07e66270fde9e4cf60f42d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_2.txt
@@ -0,0 +1,16 @@
+M2M100ForConditionalGeneration is the base model for both NLLB and NLLB MoE
+The NLLB-MoE is very similar to the NLLB model, but it's feed forward layer is based on the implementation of SwitchTransformers.
+The tokenizer is the same as the NLLB models.
+
+Implementation differences with SwitchTransformers
+The biggest difference is the way the tokens are routed. NLLB-MoE uses a top-2-gate which means that for each input, only the top two experts are selected based on the 
+highest predicted probabilities from the gating network, and the remaining experts are ignored. In SwitchTransformers, only the top-1 probabilities are computed, 
+which means that tokens have less probability of being forwarded. Moreover, if a token is not routed to any expert, SwitchTransformers still adds its unmodified hidden 
+states (kind of like a residual connection) while they are masked in NLLB's top-2 routing mechanism. 
+Generating with NLLB-MoE
+The available checkpoints require around 350GB of storage. Make sure to use accelerate if you do not have enough RAM on your machine.
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..737b3068c24f599e437a4cade751ba3afc3607a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Previously, Ring's CEO, Jamie Siminoff, remarked the company started when his doorbell wasn't audible from his shop in his garage."
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=50
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Auparavant, le PDG de Ring, Jamie Siminoff, a fait remarquer que la société avait commencé lorsque sa sonnette n'était pas audible depuis son magasin dans son garage."
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+thon
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-moe-54b", src_lang="ron_Latn")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-moe-54b")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2db3505a4c33c5c8dfc98059d1d25480664217
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb-moe.txt_chunk_4.txt
@@ -0,0 +1,20 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbMoeConfig
+[[autodoc]] NllbMoeConfig
+NllbMoeTop2Router
+[[autodoc]] NllbMoeTop2Router
+    - route_tokens
+    - forward
+NllbMoeSparseMLP
+[[autodoc]] NllbMoeSparseMLP
+    - forward
+NllbMoeModel
+[[autodoc]] NllbMoeModel
+    - forward
+NllbMoeForConditionalGeneration
+[[autodoc]] NllbMoeForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f916e96934d604bff9ab03e15004e3adafd784f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+NLLB
+Updated tokenizer behavior
+DISCLAIMER: The default behaviour for the tokenizer was fixed and thus changed in April 2023.
+The previous version adds [self.eos_token_id, self.cur_lang_code] at the end of the token sequence for both target and source tokenization. This is wrong as the NLLB paper mentions (page 48, 6.1.1. Model Architecture) :
+Note that we prefix the source sequence with the source language, as opposed to the target
+language as previously done in several works (Arivazhagan et al., 2019; Johnson et al.,
+2017). This is primarily because we prioritize optimizing zero-shot performance of our
+model on any pair of 200 languages at a minor cost to supervised performance.
+Previous behaviour:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[13374, 1398, 4260, 4039, 248130, 2, 256047]
+2: ''
+256047 : 'eng_Latn'
+
+New behaviour
+
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+tokenizer("How was your day?").input_ids
+[256047, 13374, 1398, 4260, 4039, 248130, 2]
+ 
+
+Enabling the old behaviour can be done as follows:
+thon
+
+from transformers import NllbTokenizer
+tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M", legacy_behaviour=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fae29dfc59356ddeabd7d22d3f882b901a9d9a4b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+For more details, feel free to check the linked PR and Issue.
+Overview
+The NLLB model was presented in No Language Left Behind: Scaling Human-Centered Machine Translation by Marta R. Costa-jussà, James Cross, Onur Çelebi,
+Maha Elbayad, Kenneth Heafield, Kevin Heffernan, Elahe Kalbassi, Janice Lam, Daniel Licht, Jean Maillard, Anna Sun, Skyler Wang, Guillaume Wenzek, Al Youngblood, Bapi Akula,
+Loic Barrault, Gabriel Mejia Gonzalez, Prangthip Hansanti, John Hoffman, Semarley Jarrett, Kaushik Ram Sadagopan, Dirk Rowe, Shannon Spruit, Chau Tran, Pierre Andrews,
+Necip Fazil Ayan, Shruti Bhosale, Sergey Edunov, Angela Fan, Cynthia Gao, Vedanuj Goswami, Francisco Guzmán, Philipp Koehn, Alexandre Mourachko, Christophe Ropers,
+Safiyyah Saleem, Holger Schwenk, and Jeff Wang.
+The abstract of the paper is the following:
+Driven by the goal of eradicating language barriers on a global scale, machine translation has solidified itself as a key focus of artificial intelligence research today.
+However, such efforts have coalesced around a small subset of languages, leaving behind the vast majority of mostly low-resource languages. What does it take to break the
+200 language barrier while ensuring safe, high quality results, all while keeping ethical considerations in mind? In No Language Left Behind, we took on this challenge by
+first contextualizing the need for low-resource language translation support through exploratory interviews with native speakers. Then, we created datasets and models aimed
+at narrowing the performance gap between low and high-resource languages. More specifically, we developed a conditional compute model based on Sparsely Gated Mixture of
+Experts that is trained on data obtained with novel and effective data mining techniques tailored for low-resource languages. We propose multiple architectural and training
+improvements to counteract overfitting while training on thousands of tasks. Critically, we evaluated the performance of over 40,000 different translation directions using
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3314d1d469a6b1bded9ee4d3a51a4083c6b47886
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+a human-translated benchmark, Flores-200, and combined human evaluation with a novel toxicity benchmark covering all languages in Flores-200 to assess translation safety.
+Our model achieves an improvement of 44% BLEU relative to the previous state-of-the-art, laying important groundwork towards realizing a universal translation system.
+This implementation contains the dense models available on release.
+The sparse model NLLB-MoE (Mixture of Expert) is now available! More details here
+This model was contributed by Lysandre. The authors' code can be found here.
+Generating with NLLB
+While generating the target text set the forced_bos_token_id to the target language id. The following
+example shows how to translate English to French using the facebook/nllb-200-distilled-600M model.
+Note that we're using the BCP-47 code for French fra_Latn. See here
+for the list of all BCP-47 in the Flores 200 dataset.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a40107e16c6e136da91c30c182f45890c5f4d0fa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "UN Chief says there is no military solution in Syria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["fra_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+Le chef de l'ONU dit qu'il n'y a pas de solution militaire en Syrie
+
+Generating from any other language than English
+English (eng_Latn) is set as the default language from which to translate. In order to specify that you'd like to translate from a different language,
+you should specify the BCP-47 code in the src_lang keyword argument of the tokenizer initialization.
+See example below for a translation from romanian to german:
+
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+     "facebook/nllb-200-distilled-600M", token=True, src_lang="ron_Latn"
+ )
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", token=True)
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+UN-Chef sagt, es gibt keine militärische Lösung in Syrien
+
+Resources
+
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..095ef65b785c6c7c63746f02b182c4c1f3f3d525
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nllb.txt_chunk_4.txt
@@ -0,0 +1,35 @@
+Resources
+
+Translation task guide
+Summarization task guide
+
+NllbTokenizer
+[[autodoc]] NllbTokenizer
+    - build_inputs_with_special_tokens
+NllbTokenizerFast
+[[autodoc]] NllbTokenizerFast
+Using Flash Attention 2
+Flash Attention 2 is a faster, optimized version of the attention scores computation which relies on cuda kernels.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation.
+Next, install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. You can use either torch.float16 or torch.bfloat16 precision.
+thon
+
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda").eval()
+tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
+article = "Şeful ONU spune că nu există o soluţie militară în Siria"
+inputs = tokenizer(article, return_tensors="pt").to("cuda")
+translated_tokens = model.generate(
+     **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=30
+ )
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"UN-Chef sagt, es gibt keine militärische Lösung in Syrien"
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation and the Flash Attention 2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..956857c09abfa93621f0bdb15b066fe946a2e4eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+Nougat
+Overview
+The Nougat model was proposed in Nougat: Neural Optical Understanding for Academic Documents by
+Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic. Nougat uses the same architecture as Donut, meaning an image Transformer
+encoder and an autoregressive text Transformer decoder to translate scientific PDFs to markdown, enabling easier access to them.
+The abstract from the paper is the following:
+Scientific knowledge is predominantly stored in books and scientific journals, often in the form of PDFs. However, the PDF format leads to a loss of semantic information, particularly for mathematical expressions. We propose Nougat (Neural Optical Understanding for Academic Documents), a Visual Transformer model that performs an Optical Character Recognition (OCR) task for processing scientific documents into a markup language, and demonstrate the effectiveness of our model on a new dataset of scientific documents. The proposed approach offers a promising solution to enhance the accessibility of scientific knowledge in the digital age, by bridging the gap between human-readable documents and machine-readable text. We release the models and code to accelerate future work on scientific text recognition.
+
+ Nougat high-level overview. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with Nougat is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+Nougat is always used within the VisionEncoderDecoder framework. The model is identical to Donut in terms of architecture.
+
+Inference
+Nougat's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [NougatImageProcessor] class is responsible for preprocessing the input image and
+[NougatTokenizerFast] decodes the generated target tokens to the target string. The
+[NougatProcessor] wraps [NougatImageProcessor] and [NougatTokenizerFast] classes
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step PDF transcription
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0049ec0b72eb61bbb4c623095308c7ad813da35
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nougat.txt_chunk_1.txt
@@ -0,0 +1,46 @@
+Step-by-step PDF transcription
+
+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+from datasets import load_dataset
+import torch
+processor = NougatProcessor.from_pretrained("facebook/nougat-base")
+model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-base")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)  # doctest: +IGNORE_RESULT
+prepare PDF image for the model
+filepath = hf_hub_download(repo_id="hf-internal-testing/fixtures_docvqa", filename="nougat_paper.png", repo_type="dataset")
+image = Image.open(filepath)
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generate transcription (here we only generate 30 tokens)
+outputs = model.generate(
+     pixel_values.to(device),
+     min_length=1,
+     max_new_tokens=30,
+     bad_words_ids=[[processor.tokenizer.unk_token_id]],
+ )
+sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+sequence = processor.post_process_generation(sequence, fix_markdown=False)
+note: we're using repr here such for the sake of printing the \n characters, feel free to just print the sequence
+print(repr(sequence))
+'\n\n# Nougat: Neural Optical Understanding for Academic Documents\n\n Lukas Blecher\n\nCorrespondence to: lblecher@'
+
+See the model hub to look for Nougat checkpoints.
+
+The model is identical to Donut in terms of architecture.
+
+NougatImageProcessor
+[[autodoc]] NougatImageProcessor
+    - preprocess
+NougatTokenizerFast
+[[autodoc]] NougatTokenizerFast
+NougatProcessor
+[[autodoc]] NougatProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+    - post_process_generation
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f153f1b43fb7c3bf44f83da2d2ab2e0abf6bfb20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Nyströmformer
+Overview
+The Nyströmformer model was proposed in Nyströmformer: A Nyström-Based Algorithm for Approximating Self-Attention by Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn
+Fung, Yin Li, and Vikas Singh.
+The abstract from the paper is the following:
+Transformers have emerged as a powerful tool for a broad range of natural language processing tasks. A key component
+that drives the impressive performance of Transformers is the self-attention mechanism that encodes the influence or
+dependence of other tokens on each specific token. While beneficial, the quadratic complexity of self-attention on the
+input sequence length has limited its application to longer sequences -- a topic being actively studied in the
+community. To address this limitation, we propose Nyströmformer -- a model that exhibits favorable scalability as a
+function of sequence length. Our idea is based on adapting the Nyström method to approximate standard self-attention
+with O(n) complexity. The scalability of Nyströmformer enables application to longer sequences with thousands of
+tokens. We perform evaluations on multiple downstream tasks on the GLUE benchmark and IMDB reviews with standard
+sequence length, and find that our Nyströmformer performs comparably, or in a few cases, even slightly better, than
+standard self-attention. On longer sequence tasks in the Long Range Arena (LRA) benchmark, Nyströmformer performs
+favorably relative to other efficient self-attention methods. Our code is available at this https URL.
+This model was contributed by novice03. The original code can be found here.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a78afb562a4e03743c21165bbf465e9425b9e6dc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_nystromformer.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+NystromformerConfig
+[[autodoc]] NystromformerConfig
+NystromformerModel
+[[autodoc]] NystromformerModel
+    - forward
+NystromformerForMaskedLM
+[[autodoc]] NystromformerForMaskedLM
+    - forward
+NystromformerForSequenceClassification
+[[autodoc]] NystromformerForSequenceClassification
+    - forward
+NystromformerForMultipleChoice
+[[autodoc]] NystromformerForMultipleChoice
+    - forward
+NystromformerForTokenClassification
+[[autodoc]] NystromformerForTokenClassification
+    - forward
+NystromformerForQuestionAnswering
+[[autodoc]] NystromformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8655d81a40942231a544799a89a26953ec80dc25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+OLMo
+Overview
+The OLMo model was proposed in OLMo: Accelerating the Science of Language Models by Dirk Groeneveld, Iz Beltagy, Pete Walsh, Akshita Bhagia, Rodney Kinney, Oyvind Tafjord, Ananya Harsh Jha, Hamish Ivison, Ian Magnusson, Yizhong Wang, Shane Arora, David Atkinson, Russell Authur, Khyathi Raghavi Chandu, Arman Cohan, Jennifer Dumas, Yanai Elazar, Yuling Gu, Jack Hessel, Tushar Khot, William Merrill, Jacob Morrison, Niklas Muennighoff, Aakanksha Naik, Crystal Nam, Matthew E. Peters, Valentina Pyatkin, Abhilasha Ravichander, Dustin Schwenk, Saurabh Shah, Will Smith, Emma Strubell, Nishant Subramani, Mitchell Wortsman, Pradeep Dasigi, Nathan Lambert, Kyle Richardson, Luke Zettlemoyer, Jesse Dodge, Kyle Lo, Luca Soldaini, Noah A. Smith, Hannaneh Hajishirzi.
+OLMo is a series of Open Language Models designed to enable the science of language models. The OLMo models are trained on the Dolma dataset. We release all code, checkpoints, logs (coming soon), and details involved in training these models.
+The abstract from the paper is the following:
+Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, this technical report details the first release of OLMo, a state-of-the-art, truly Open Language Model and its framework to build and study the science of language modeling. Unlike most prior efforts that have only released model weights and inference code, we release OLMo and the whole framework, including training data and training and evaluation code. We hope this release will empower and strengthen the open research community and inspire a new wave of innovation.
+This model was contributed by shanearora.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f92e7b3ad90b989384ecaae28f1b8fb6a952c728
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_olmo.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+This model was contributed by shanearora.
+The original code can be found here.
+OlmoConfig
+[[autodoc]] OlmoConfig
+OlmoModel
+[[autodoc]] OlmoModel
+    - forward
+OlmoForCausalLM
+[[autodoc]] OlmoForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cce8c1fa92ab557c43170604e185fcd81702b845
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+OneFormer
+Overview
+The OneFormer model was proposed in OneFormer: One Transformer to Rule Universal Image Segmentation by Jitesh Jain, Jiachen Li, MangTik Chiu, Ali Hassani, Nikita Orlov, Humphrey Shi. OneFormer is a universal image segmentation framework that can be trained on a single panoptic dataset to perform semantic, instance, and panoptic segmentation tasks. OneFormer uses a task token to condition the model on the task in focus, making the architecture task-guided for training, and task-dynamic for inference.
+
+The abstract from the paper is the following:
+Universal Image Segmentation is not a new concept. Past attempts to unify image segmentation in the last decades include scene parsing, panoptic segmentation, and, more recently, new panoptic architectures. However, such panoptic architectures do not truly unify image segmentation because they need to be trained individually on the semantic, instance, or panoptic segmentation to achieve the best performance. Ideally, a truly universal framework should be trained only once and achieve SOTA performance across all three image segmentation tasks. To that end, we propose OneFormer, a universal image segmentation framework that unifies segmentation with a multi-task train-once design. We first propose a task-conditioned joint training strategy that enables training on ground truths of each domain (semantic, instance, and panoptic segmentation) within a single multi-task training process. Secondly, we introduce a task token to condition our model on the task at hand, making our model task-dynamic to support multi-task training and inference. Thirdly, we propose using a query-text contrastive loss during training to establish better inter-task and inter-class distinctions. Notably, our single OneFormer model outperforms specialized Mask2Former models across all three segmentation tasks on ADE20k, CityScapes, and COCO, despite the latter being trained on each of the three tasks individually with three times the resources. With new ConvNeXt and DiNAT backbones, we observe even more performance improvement. We believe OneFormer is a significant step towards making image segmentation more universal and accessible.
+The figure below illustrates the architecture of OneFormer. Taken from the original paper.
+
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a50a1183b1177cfb91ba19ef38459c40649a840d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+This model was contributed by Jitesh Jain. The original code can be found here.
+Usage tips
+
+OneFormer requires two inputs during inference: image and task token. 
+During training, OneFormer only uses panoptic annotations.
+If you want to train the model in a distributed environment across multiple nodes, then one should update the
+  get_num_masks function inside in the OneFormerLoss class of modeling_oneformer.py. When training on multiple nodes, this should be
+  set to the average number of target masks across all nodes, as can be seen in the original implementation here.
+One can use [OneFormerProcessor] to prepare input images and task inputs for the model and optional targets for the model. [OneformerProcessor] wraps [OneFormerImageProcessor] and [CLIPTokenizer] into a single instance to both prepare the images and encode the task inputs.
+To get the final segmentation, depending on the task, you can call [~OneFormerProcessor.post_process_semantic_segmentation] or [~OneFormerImageProcessor.post_process_instance_segmentation] or [~OneFormerImageProcessor.post_process_panoptic_segmentation]. All three tasks can be solved using [OneFormerForUniversalSegmentation] output, panoptic segmentation accepts an optional label_ids_to_fuse argument to fuse instances of the target object/s (e.g. sky) together.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d9551bc3e4b4119e67712a7a51d5405af88090c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_oneformer.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OneFormer.
+
+Demo notebooks regarding inference + fine-tuning on custom data can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+OneFormer specific outputs
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerModelOutput
+[[autodoc]] models.oneformer.modeling_oneformer.OneFormerForUniversalSegmentationOutput
+OneFormerConfig
+[[autodoc]] OneFormerConfig
+OneFormerImageProcessor
+[[autodoc]] OneFormerImageProcessor
+    - preprocess
+    - encode_inputs
+    - post_process_semantic_segmentation
+    - post_process_instance_segmentation
+    - post_process_panoptic_segmentation
+OneFormerProcessor
+[[autodoc]] OneFormerProcessor
+OneFormerModel
+[[autodoc]] OneFormerModel
+    - forward
+OneFormerForUniversalSegmentation
+[[autodoc]] OneFormerForUniversalSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..28a74049df4b416deed0fd1eac4bb69e077a7257
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_open-llama.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Open-Llama
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.31.0.
+You can do so by running the following command: pip install -U transformers==4.31.0.
+
+This model differs from the OpenLLaMA models on the Hugging Face Hub, which primarily use the LLaMA architecture.
+
+Overview
+The Open-Llama model was proposed in the open source Open-Llama project by community developer s-JoL.
+The model is mainly based on LLaMA with some modifications, incorporating memory-efficient attention from Xformers, stable embedding from Bloom, and shared input-output embedding from PaLM.
+And the model is pre-trained on both Chinese and English, which gives it better performance on Chinese language tasks.
+This model was contributed by s-JoL.
+The original code was released on GitHub by s-JoL, but is now removed.
+OpenLlamaConfig
+[[autodoc]] OpenLlamaConfig
+OpenLlamaModel
+[[autodoc]] OpenLlamaModel
+    - forward
+OpenLlamaForCausalLM
+[[autodoc]] OpenLlamaForCausalLM
+    - forward
+OpenLlamaForSequenceClassification
+[[autodoc]] OpenLlamaForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb0351c89fd87cb8ef2aede17c792dc822c80761
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+OpenAI GPT
+
+Overview
+OpenAI GPT model was proposed in Improving Language Understanding by Generative Pre-Training
+by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. It's a causal (unidirectional) transformer
+pre-trained using language modeling on a large corpus will long range dependencies, the Toronto Book Corpus.
+The abstract from the paper is the following:
+Natural language understanding comprises a wide range of diverse tasks such as textual entailment, question answering,
+semantic similarity assessment, and document classification. Although large unlabeled text corpora are abundant,
+labeled data for learning these specific tasks is scarce, making it challenging for discriminatively trained models to
+perform adequately. We demonstrate that large gains on these tasks can be realized by generative pretraining of a
+language model on a diverse corpus of unlabeled text, followed by discriminative fine-tuning on each specific task. In
+contrast to previous approaches, we make use of task-aware input transformations during fine-tuning to achieve
+effective transfer while requiring minimal changes to the model architecture. We demonstrate the effectiveness of our
+approach on a wide range of benchmarks for natural language understanding. Our general task-agnostic model outperforms
+discriminatively trained models that use architectures specifically crafted for each task, significantly improving upon
+the state of the art in 9 out of the 12 tasks studied.
+Write With Transformer is a webapp created and hosted by Hugging Face
+showcasing the generative capabilities of several models. GPT is one of them.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+GPT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+GPT was trained with a causal language modeling (CLM) objective and is therefore powerful at predicting the next
+  token in a sequence. Leveraging this feature allows GPT-2 to generate syntactically coherent text as it can be
+  observed in the run_generation.py example script.
+
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94c519b5e4966244cbfd42d3163758e150602b12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Note:
+If you want to reproduce the original tokenization process of the OpenAI GPT paper, you will need to install ftfy
+and SpaCy:
+
+pip install spacy ftfy==4.4.3
+python -m spacy download en
+If you don't install ftfy and SpaCy, the [OpenAIGPTTokenizer] will default to tokenize
+using BERT's BasicTokenizer followed by Byte-Pair Encoding (which should be fine for most usage, don't worry).
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OpenAI GPT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on outperforming OpenAI GPT-3 with SetFit for text-classification.
+See also: Text classification task guide
+
+A blog on how to Finetune a non-English GPT-2 Model with Hugging Face.
+A blog on How to generate text: using different decoding methods for language generation with Transformers with GPT-2.
+A blog on Training CodeParrot 🦜 from Scratch, a large GPT-2 model.
+A blog on Faster Text Generation with TensorFlow and XLA with GPT-2.
+A blog on How to train a Language Model with Megatron-LM with a GPT-2 model.
+A notebook on how to finetune GPT2 to generate lyrics in the style of your favorite artist. 🌎
+A notebook on how to finetune GPT2 to generate tweets in the style of your favorite Twitter user. 🌎
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OpenAIGPTLMHeadModel] is supported by this causal language modeling example script, text generation example script and notebook.
+[TFOpenAIGPTLMHeadModel] is supported by this causal language modeling example script and notebook.
+See also: Causal language modeling task guide
+
+A course material on Byte-Pair Encoding tokenization.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17d7c63c1a4c478166d3f07ea76bd91f74a99028
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_openai-gpt.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+A course material on Byte-Pair Encoding tokenization.
+
+OpenAIGPTConfig
+[[autodoc]] OpenAIGPTConfig
+OpenAIGPTTokenizer
+[[autodoc]] OpenAIGPTTokenizer
+    - save_vocabulary
+OpenAIGPTTokenizerFast
+[[autodoc]] OpenAIGPTTokenizerFast
+OpenAI specific outputs
+[[autodoc]] models.openai.modeling_openai.OpenAIGPTDoubleHeadsModelOutput
+[[autodoc]] models.openai.modeling_tf_openai.TFOpenAIGPTDoubleHeadsModelOutput
+
+OpenAIGPTModel
+[[autodoc]] OpenAIGPTModel
+    - forward
+OpenAIGPTLMHeadModel
+[[autodoc]] OpenAIGPTLMHeadModel
+    - forward
+OpenAIGPTDoubleHeadsModel
+[[autodoc]] OpenAIGPTDoubleHeadsModel
+    - forward
+OpenAIGPTForSequenceClassification
+[[autodoc]] OpenAIGPTForSequenceClassification
+    - forward
+
+TFOpenAIGPTModel
+[[autodoc]] TFOpenAIGPTModel
+    - call
+TFOpenAIGPTLMHeadModel
+[[autodoc]] TFOpenAIGPTLMHeadModel
+    - call
+TFOpenAIGPTDoubleHeadsModel
+[[autodoc]] TFOpenAIGPTDoubleHeadsModel
+    - call
+TFOpenAIGPTForSequenceClassification
+[[autodoc]] TFOpenAIGPTForSequenceClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f1ceba1aeffa11af93a80daa60673c048b2a68d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+OPT
+Overview
+The OPT model was proposed in Open Pre-trained Transformer Language Models by Meta AI.
+OPT is a series of open-sourced large causal language models which perform similar in performance to GPT3.
+The abstract from the paper is the following:
+Large language models, which are often trained for hundreds of thousands of compute days, have shown remarkable capabilities for zero- and few-shot learning. Given their computational cost, these models are difficult to replicate without significant capital. For the few that are available through APIs, no access is granted to the full model weights, making them difficult to study. We present Open Pre-trained Transformers (OPT), a suite of decoder-only pre-trained transformers ranging from 125M to 175B parameters, which we aim to fully and responsibly share with interested researchers. We show that OPT-175B is comparable to GPT-3, while requiring only 1/7th the carbon footprint to develop. We are also releasing our logbook detailing the infrastructure challenges we faced, along with code for experimenting with all of the released models.
+This model was contributed by Arthur Zucker, Younes Belkada, and Patrick Von Platen.
+The original code can be found here.
+Tips:
+- OPT has the same architecture as [BartDecoder].
+- Contrary to GPT2, OPT adds the EOS token </s> to the beginning of every prompt.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with OPT. If you're
+interested in submitting a resource to be included here, please feel free to open a Pull Request and we will review it.
+The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on fine-tuning OPT with PEFT, bitsandbytes, and Transformers. 🌎
+A blog post on decoding strategies with OPT.
+Causal language modeling chapter of the 🤗 Hugging Face Course.
+[OPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[TFOPTForCausalLM] is supported by this causal language modeling example script and notebook.
+[FlaxOPTForCausalLM] is supported by this causal language modeling example script.
+
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed2a1db5b573b29ddd237ca7807d940429dc9a6f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Text classification task guide
+[OPTForSequenceClassification] is supported by this example script and notebook.
+
+[OPTForQuestionAnswering] is supported by this question answering example script and notebook.
+Question answering chapter
+  of the 🤗 Hugging Face Course.
+
+⚡️ Inference
+
+A blog post on How 🤗 Accelerate runs very large models thanks to PyTorch with OPT.
+
+Combining OPT and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import OPTForCausalLM, GPT2Tokenizer
+device = "cuda" # the device to load the model onto
+model = OPTForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.float16, attn_implementation="flash_attention_2")
+tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-350m")
+prompt = ("A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the "
+              "Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived "
+              "there?")
+model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
+model.to(device)
+generated_ids = model.generate(**model_inputs, max_new_tokens=30, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'A chat between a curious human and the Statue of Liberty.\n\nHuman: What is your name?\nStatue: I am the Statue of Liberty.\nHuman: Where do you live?\nStatue: New York City.\nHuman: How long have you lived there?\nStatue: I have lived here for about a year.\nHuman: What is your favorite place to eat?\nStatue: I love'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea7d22fda927ddb7af3062a27070d9dbe07027ab
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_opt.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-2.7b checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using facebook/opt-350m checkpoint and the Flash Attention 2 version of the model using two different sequence lengths.
+
+OPTConfig
+[[autodoc]] OPTConfig
+
+OPTModel
+[[autodoc]] OPTModel
+    - forward
+OPTForCausalLM
+[[autodoc]] OPTForCausalLM
+    - forward
+OPTForSequenceClassification
+[[autodoc]] OPTForSequenceClassification
+    - forward
+OPTForQuestionAnswering
+[[autodoc]] OPTForQuestionAnswering
+    - forward
+
+TFOPTModel
+[[autodoc]] TFOPTModel
+    - call
+TFOPTForCausalLM
+[[autodoc]] TFOPTForCausalLM
+    - call
+
+FlaxOPTModel
+[[autodoc]] FlaxOPTModel
+    - call
+FlaxOPTForCausalLM
+[[autodoc]] FlaxOPTForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3308c27c3f24f18bd93bc8735e70fdfc27ae79fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWLv2
+Overview
+OWLv2 was proposed in Scaling Open-Vocabulary Object Detection by Matthias Minderer, Alexey Gritsenko, Neil Houlsby. OWLv2 scales up OWL-ViT using self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. This results in large gains over the previous state-of-the-art for zero-shot object detection.
+The abstract from the paper is the following:
+Open-vocabulary object detection has benefited greatly from pretrained vision-language models, but is still limited by the amount of available detection training data. While detection training data can be expanded by using Web image-text pairs as weak supervision, this has not been done at scales comparable to image-level pretraining. Here, we scale up detection data with self-training, which uses an existing detector to generate pseudo-box annotations on image-text pairs. Major challenges in scaling self-training are the choice of label space, pseudo-annotation filtering, and training efficiency. We present the OWLv2 model and OWL-ST self-training recipe, which address these challenges. OWLv2 surpasses the performance of previous state-of-the-art open-vocabulary detectors already at comparable training scales (~10M examples). However, with OWL-ST, we can scale to over 1B examples, yielding further large improvement: With an L/14 architecture, OWL-ST improves AP on LVIS rare classes, for which the model has seen no human box annotations, from 31.2% to 44.6% (43% relative improvement). OWL-ST unlocks Web-scale training for open-world localization, similar to what has been seen for image classification and language modelling.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..473faac42248edbc2bc0a9ba200cfc064e31da17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_1.txt
@@ -0,0 +1,7 @@
+OWLv2 high-level overview. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+OWLv2 is, just like its predecessor OWL-ViT, a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[Owlv2ImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [Owlv2Processor] wraps [Owlv2ImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [Owlv2Processor] and [Owlv2ForObjectDetection].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..436cfd575699d8182d4d6c2b3dd75136dda2c816
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+import requests
+from PIL import Image
+import torch
+from transformers import Owlv2Processor, Owlv2ForObjectDetection
+processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
+model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.614 at location [341.67, 23.39, 642.32, 371.35]
+Detected a photo of a cat with confidence 0.665 at location [6.75, 51.96, 326.62, 473.13]
+
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24a4163c873326e697ec7cdac08d1b495160afc9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlv2.txt_chunk_3.txt
@@ -0,0 +1,37 @@
+Resources
+
+A demo notebook on using OWLv2 for zero- and one-shot (image-guided) object detection can be found here.
+Zero-shot object detection task guide
+
+The architecture of OWLv2 is identical to OWL-ViT, however the object detection head now also includes an objectness classifier, which predicts the (query-agnostic) likelihood that a predicted box contains an object (as opposed to background). The objectness score can be used to rank or filter predictions independently of text queries.
+Usage of OWLv2 is identical to OWL-ViT with a new, updated image processor ([Owlv2ImageProcessor]).
+
+Owlv2Config
+[[autodoc]] Owlv2Config
+    - from_text_vision_configs
+Owlv2TextConfig
+[[autodoc]] Owlv2TextConfig
+Owlv2VisionConfig
+[[autodoc]] Owlv2VisionConfig
+Owlv2ImageProcessor
+[[autodoc]] Owlv2ImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+Owlv2Processor
+[[autodoc]] Owlv2Processor
+Owlv2Model
+[[autodoc]] Owlv2Model
+    - forward
+    - get_text_features
+    - get_image_features
+Owlv2TextModel
+[[autodoc]] Owlv2TextModel
+    - forward
+Owlv2VisionModel
+[[autodoc]] Owlv2VisionModel
+    - forward
+Owlv2ForObjectDetection
+[[autodoc]] Owlv2ForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a35b2e6f99e50b414148226d10b91743d761d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+OWL-ViT
+Overview
+The OWL-ViT (short for Vision Transformer for Open-World Localization) was proposed in Simple Open-Vocabulary Object Detection with Vision Transformers by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby. OWL-ViT is an open-vocabulary object detection network trained on a variety of (image, text) pairs. It can be used to query an image with one or multiple text queries to search for and detect target objects described in text.
+The abstract from the paper is the following:
+Combining simple architectures with large-scale pre-training has led to massive improvements in image classification. For object detection, pre-training and scaling approaches are less well established, especially in the long-tailed and open-vocabulary setting, where training data is relatively scarce. In this paper, we propose a strong recipe for transferring image-text models to open-vocabulary object detection. We use a standard Vision Transformer architecture with minimal modifications, contrastive image-text pre-training, and end-to-end detection fine-tuning. Our analysis of the scaling properties of this setup shows that increasing image-level pre-training and model size yield consistent improvements on the downstream detection task. We provide the adaptation strategies and regularizations needed to attain very strong performance on zero-shot text-conditioned and one-shot image-conditioned object detection. Code and models are available on GitHub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..570cabe570c75a5d852615227212ed3ac3108248
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+OWL-ViT architecture. Taken from the original paper. 
+This model was contributed by adirik. The original code can be found here.
+Usage tips
+OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses CLIP as its multi-modal backbone, with a ViT-like Transformer to get visual features and a causal language model to get the text features. To use CLIP for detection, OWL-ViT removes the final token pooling layer of the vision model and attaches a lightweight classification and box head to each transformer output token. Open-vocabulary classification is enabled by replacing the fixed classification layer weights with the class-name embeddings obtained from the text model. The authors first train CLIP from scratch and fine-tune it end-to-end with the classification and box heads on standard detection datasets using a bipartite matching loss. One or multiple text queries per image can be used to perform zero-shot text-conditioned object detection.
+[OwlViTImageProcessor] can be used to resize (or rescale) and normalize images for the model and [CLIPTokenizer] is used to encode the text. [OwlViTProcessor] wraps [OwlViTImageProcessor] and [CLIPTokenizer] into a single instance to both encode the text and prepare the images. The following example shows how to perform object detection using [OwlViTProcessor] and [OwlViTForObjectDetection].
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4703b08bd756f1e918bef005865aae6f2adf864
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+import requests
+from PIL import Image
+import torch
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+texts = [["a photo of a cat", "a photo of a dog"]]
+inputs = processor(text=texts, images=image, return_tensors="pt")
+outputs = model(**inputs)
+Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+target_sizes = torch.Tensor([image.size[::-1]])
+Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
+results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
+i = 0  # Retrieve predictions for the first image for the corresponding text queries
+text = texts[i]
+boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
+for box, score, label in zip(boxes, scores, labels):
+     box = [round(i, 2) for i in box.tolist()]
+     print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
+Detected a photo of a cat with confidence 0.707 at location [324.97, 20.44, 640.58, 373.29]
+Detected a photo of a cat with confidence 0.717 at location [1.46, 55.26, 315.55, 472.17]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b75d19fec2f8e6288abb6ad4ff2f6c0cbe1e4499
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_owlvit.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+Resources
+A demo notebook on using OWL-ViT for zero- and one-shot (image-guided) object detection can be found here.
+OwlViTConfig
+[[autodoc]] OwlViTConfig
+    - from_text_vision_configs
+OwlViTTextConfig
+[[autodoc]] OwlViTTextConfig
+OwlViTVisionConfig
+[[autodoc]] OwlViTVisionConfig
+OwlViTImageProcessor
+[[autodoc]] OwlViTImageProcessor
+    - preprocess
+    - post_process_object_detection
+    - post_process_image_guided_detection
+OwlViTFeatureExtractor
+[[autodoc]] OwlViTFeatureExtractor
+    - call
+    - post_process
+    - post_process_image_guided_detection
+OwlViTProcessor
+[[autodoc]] OwlViTProcessor
+OwlViTModel
+[[autodoc]] OwlViTModel
+    - forward
+    - get_text_features
+    - get_image_features
+OwlViTTextModel
+[[autodoc]] OwlViTTextModel
+    - forward
+OwlViTVisionModel
+[[autodoc]] OwlViTVisionModel
+    - forward
+OwlViTForObjectDetection
+[[autodoc]] OwlViTForObjectDetection
+    - forward
+    - image_guided_detection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8724cfa114d6c291adf441983eb22c499525e0ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+PaliGemma
+Overview
+The PaliGemma model was proposed in PaliGemma – Google's Cutting-Edge Open Vision Language Model by Google. It is a 3B vision-language model composed by a SigLIP vision encoder and a Gemma language decoder linked by a multimodal linear projection. It cuts an image into a fixed number of VIT tokens and prepends it to an optional prompt. One particularity is that the model uses full block attention on all the image tokens plus the input text tokens. It comes in 3 resolutions, 224x224, 448x448 and 896x896 with 3 base models, with 55 fine-tuned versions for different tasks, and 2 mix models.
+
+ PaliGemma architecture. Taken from the blog post. 
+This model was contributed by Molbap.
+Usage tips
+Inference with PaliGemma can be performed as follows:
+thon
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+model_id = "google/paligemma-3b-mix-224"
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
+processor = AutoProcessor.from_pretrained(model_id)
+prompt = "What is on the flower?"
+image_file = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
+raw_image = Image.open(requests.get(image_file, stream=True).raw)
+inputs = processor(prompt, raw_image, return_tensors="pt")
+output = model.generate(**inputs, max_new_tokens=20)
+print(processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
+
+PaliGemma is not meant for conversational use, and it works best when fine-tuning to a specific use case. Some downstream tasks on which PaliGemma can be fine-tuned include image captioning, visual question answering (VQA), object detection, referring expression segmentation and document understanding.
+One can use PaliGemmaProcessor to prepare images, text and optional labels for the model. When fine-tuning a PaliGemma model, the suffix argument can be passed to the processor which creates the labels for the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc2efdbd7b09099fc4c39b4e85ff8bc6c7549671
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_paligemma.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+python
+prompt = "What is on the flower?"
+answer = "a bee"
+inputs = processor(text=prompt, images=raw_image, suffix=answer, return_tensors="pt")
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PaliGemma. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post introducing all the features of PaliGemma can be found here.
+Demo notebooks on how to fine-tune PaliGemma for VQA with the Trainer API along with inference can be found here.
+Demo notebooks on how to fine-tune PaliGemma on a custom dataset (receipt image -> JSON) along with inference can be found here. 🌎
+
+PaliGemmaConfig
+[[autodoc]] PaliGemmaConfig
+PaliGemmaProcessor
+[[autodoc]] PaliGemmaProcessor
+PaliGemmaForConditionalGeneration
+[[autodoc]] PaliGemmaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54b3585d0e259bb9a8be8e57c1769cf8262d7ba9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_0.txt
@@ -0,0 +1,10 @@
+PatchTSMixer
+Overview
+The PatchTSMixer model was proposed in TSMixer: Lightweight MLP-Mixer Model for Multivariate Time Series Forecasting by Vijay Ekambaram, Arindam Jati, Nam Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+PatchTSMixer is a lightweight time-series modeling approach based on the MLP-Mixer architecture. In this HuggingFace implementation, we provide PatchTSMixer's capabilities to effortlessly facilitate lightweight mixing across patches, channels, and hidden features for effective multivariate time-series modeling. It also supports various attention mechanisms starting from simple gated attention to more complex self-attention blocks that can be customized accordingly. The model can be pretrained and subsequently used for various downstream tasks such as forecasting, classification and regression.
+The abstract from the paper is the following:
+TSMixer is a lightweight neural architecture exclusively composed of multi-layer perceptron (MLP) modules designed for multivariate forecasting and representation learning on patched time series. Our model draws inspiration from the success of MLP-Mixer models in computer vision. We demonstrate the challenges involved in adapting Vision MLP-Mixer for time series and introduce empirically validated components to enhance accuracy. This includes a novel design paradigm of attaching online reconciliation heads to the MLP-Mixer backbone, for explicitly modeling the time-series properties such as hierarchy and channel-correlations. We also propose a Hybrid channel modeling approach to effectively handle noisy channel interactions and generalization across diverse datasets, a common challenge in existing patch channel-mixing methods. Additionally, a simple gated attention mechanism is introduced in the backbone to prioritize important features. By incorporating these lightweight components, we significantly enhance the learning capability of simple MLP structures, outperforming complex Transformer models with minimal computing usage. Moreover, TSMixer's modular design enables compatibility with both supervised and masked self-supervised learning methods, making it a promising building block for time-series Foundation Models. TSMixer outperforms state-of-the-art MLP and Transformer models in forecasting by a considerable margin of 8-60%. It also outperforms the latest strong benchmarks of Patch-Transformer models (by 1-2%) with a significant reduction in memory and runtime (2-3X).
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b74f471635601aa418501f13dcdd93720cccc9da
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+This model was contributed by ajati, vijaye12, 
+gsinthong, namctin,
+wmgifford, kashif.
+Usage example
+The code snippet below shows how to randomly initialize a PatchTSMixer model. The model is compatible with the Trainer API.
+thon
+from transformers import PatchTSMixerConfig, PatchTSMixerForPrediction
+from transformers import Trainer, TrainingArguments,
+config = PatchTSMixerConfig(context_length = 512, prediction_length = 96)
+model = PatchTSMixerForPrediction(config)
+trainer = Trainer(model=model, args=training_args, 
+            train_dataset=train_dataset,
+            eval_dataset=valid_dataset)
+trainer.train()
+results = trainer.evaluate(test_dataset)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29af45ac17c21430861f06180366ef7c7bad2185
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtsmixer.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSMixerForTimeSeriesClassification] and [PatchTSMixerForRegression] classes.
+Resources
+
+A blog post explaining PatchTSMixer in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSMixerConfig
+[[autodoc]] PatchTSMixerConfig
+PatchTSMixerModel
+[[autodoc]] PatchTSMixerModel
+    - forward
+PatchTSMixerForPrediction
+[[autodoc]] PatchTSMixerForPrediction
+    - forward
+PatchTSMixerForTimeSeriesClassification
+[[autodoc]] PatchTSMixerForTimeSeriesClassification
+    - forward
+PatchTSMixerForPretraining
+[[autodoc]] PatchTSMixerForPretraining
+    - forward
+PatchTSMixerForRegression
+[[autodoc]] PatchTSMixerForRegression
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c667f6c58d97053d5b9c47bddba549e8bde78ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+PatchTST
+Overview
+The PatchTST model was proposed in A Time Series is Worth 64 Words: Long-term Forecasting with Transformers by Yuqi Nie, Nam H. Nguyen, Phanwadee Sinthong and Jayant Kalagnanam.
+At a high level the model vectorizes time series into patches of a given size and encodes the resulting sequence of vectors via a Transformer that then outputs the prediction length forecast via an appropriate head. The model is illustrated in the following figure:
+
+The abstract from the paper is the following:
+We propose an efficient design of Transformer-based models for multivariate time series forecasting and self-supervised representation learning. It is based on two key components: (i) segmentation of time series into subseries-level patches which are served as input tokens to Transformer; (ii) channel-independence where each channel contains a single univariate time series that shares the same embedding and Transformer weights across all the series. Patching design naturally has three-fold benefit: local semantic information is retained in the embedding; computation and memory usage of the attention maps are quadratically reduced given the same look-back window; and the model can attend longer history. Our channel-independent patch time series Transformer (PatchTST) can improve the long-term forecasting accuracy significantly when compared with that of SOTA Transformer-based models. We also apply our model to self-supervised pre-training tasks and attain excellent fine-tuning performance, which outperforms supervised training on large datasets. Transferring of masked pre-trained representation on one dataset to others also produces SOTA forecasting accuracy.
+This model was contributed by namctin, gsinthong, diepi, vijaye12, wmgifford, and kashif. The original code can be found here.
+Usage tips
+The model can also be used for time series classification and time series regression. See the respective [PatchTSTForClassification] and [PatchTSTForRegression] classes.
+Resources
+
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1433e89bbbc5b03a180939f1ab942fcd771c2954
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_patchtst.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+A blog post explaining PatchTST in depth can be found here. The blog can also be opened in Google Colab.
+
+PatchTSTConfig
+[[autodoc]] PatchTSTConfig
+PatchTSTModel
+[[autodoc]] PatchTSTModel
+    - forward
+PatchTSTForPrediction
+[[autodoc]] PatchTSTForPrediction
+    - forward
+PatchTSTForClassification
+[[autodoc]] PatchTSTForClassification
+    - forward
+PatchTSTForPretraining
+[[autodoc]] PatchTSTForPretraining
+    - forward
+PatchTSTForRegression
+[[autodoc]] PatchTSTForRegression
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58292b82c81f21ee97a9287f6463b3fc1af029ed
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Pegasus
+
+Overview
+The Pegasus model was proposed in PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu on Dec 18, 2019.
+According to the abstract,
+
+Pegasus' pretraining task is intentionally similar to summarization: important sentences are removed/masked from an
+  input document and are generated together as one output sequence from the remaining sentences, similar to an
+  extractive summary.
+Pegasus achieves SOTA summarization performance on all 12 downstream tasks, as measured by ROUGE and human eval.
+
+This model was contributed by sshleifer. The Authors' code can be found here.
+Usage tips
+
+Sequence-to-sequence model with the same encoder-decoder model architecture as BART. Pegasus is pre-trained jointly on two self-supervised objective functions: Masked Language Modeling (MLM) and a novel summarization specific pretraining objective, called Gap Sentence Generation (GSG).
+
+MLM: encoder input tokens are randomly replaced by a mask tokens and have to be predicted by the encoder (like in BERT)
+
+GSG: whole encoder input sentences are replaced by a second mask token and fed to the decoder, but which has a causal mask to hide the future words like a regular auto-regressive transformer decoder.
+
+FP16 is not supported (help/ideas on this appreciated!).
+
+The adafactor optimizer is recommended for pegasus fine-tuning.
+
+Checkpoints
+All the checkpoints are fine-tuned for summarization, besides
+pegasus-large, whence the other checkpoints are fine-tuned:
+
+Each checkpoint is 2.2 GB on disk and 568M parameters.
+FP16 is not supported (help/ideas on this appreciated!).
+Summarizing xsum in fp32 takes about 400ms/sample, with default parameters on a v100 GPU.
+Full replication results and correctly pre-processed data can be found in this Issue.
+Distilled checkpoints are described in this paper.
+
+Implementation Notes
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5cb106f4f17aae4c413b6be51a8481be66f9f44
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Implementation Notes
+
+All models are transformer encoder-decoders with 16 layers in each component.
+The implementation is completely inherited from [BartForConditionalGeneration]
+Some key configuration differences:
+static, sinusoidal position embeddings
+the model starts generating with pad_token_id (which has 0 token_embedding) as the prefix.
+more beams are used (num_beams=8)
+All pretrained pegasus checkpoints are the same besides three attributes: tokenizer.model_max_length (maximum
+  input size), max_length (the maximum number of tokens to generate) and length_penalty.
+The code to convert checkpoints trained in the author's repo can be
+  found in convert_pegasus_tf_to_pytorch.py.
+
+Usage Example
+thon
+
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import torch
+src_text = [
+     """ PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow."""
+ ]
+
+ model_name = "google/pegasus-xsum"
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
+ model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)
+ batch = tokenizer(src_text, truncation=True, padding="longest", return_tensors="pt").to(device)
+ translated = model.generate(**batch)
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+ assert (
+     tgt_text[0]
+     == "California's largest electricity provider has turned off power to hundreds of thousands of customers."
+ )
+
+Resources
+
+Script to fine-tune pegasus
+  on the XSUM dataset. Data download instructions at examples/pytorch/summarization/.
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9658d2ecbd87c6297c0685a6a0533f2de335526
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+PegasusConfig
+[[autodoc]] PegasusConfig
+PegasusTokenizer
+warning: add_tokens does not work at the moment.
+[[autodoc]] PegasusTokenizer
+PegasusTokenizerFast
+[[autodoc]] PegasusTokenizerFast
+
+PegasusModel
+[[autodoc]] PegasusModel
+    - forward
+PegasusForConditionalGeneration
+[[autodoc]] PegasusForConditionalGeneration
+    - forward
+PegasusForCausalLM
+[[autodoc]] PegasusForCausalLM
+    - forward
+
+TFPegasusModel
+[[autodoc]] TFPegasusModel
+    - call
+TFPegasusForConditionalGeneration
+[[autodoc]] TFPegasusForConditionalGeneration
+    - call
+
+FlaxPegasusModel
+[[autodoc]] FlaxPegasusModel
+    - call
+    - encode
+    - decode
+FlaxPegasusForConditionalGeneration
+[[autodoc]] FlaxPegasusForConditionalGeneration
+    - call
+    - encode
+    - decode
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36fda5f1545ded5871177aa187962cc8a33cd0e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pegasus_x.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+PEGASUS-X
+Overview
+The PEGASUS-X model was proposed in Investigating Efficiently Extending Transformers for Long Input Summarization  by Jason Phang, Yao Zhao and Peter J. Liu.
+PEGASUS-X (PEGASUS eXtended) extends the PEGASUS models for long input summarization through additional long input pretraining and using staggered block-local attention with global tokens in the encoder.
+The abstract from the paper is the following:
+While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs continues to be a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most pretrained models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms can most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens. PEGASUS-X achieves strong performance on long input summarization tasks comparable with much larger models while adding few additional parameters and not requiring model parallelism to train.
+This model was contributed by zphang. The original code can be found here.
+Documentation resources
+
+Translation task guide
+Summarization task guide
+
+PEGASUS-X uses the same tokenizer as PEGASUS.
+
+PegasusXConfig
+[[autodoc]] PegasusXConfig
+PegasusXModel
+[[autodoc]] PegasusXModel
+    - forward
+PegasusXForConditionalGeneration
+[[autodoc]] PegasusXForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02a709faa6a9a6f68e453d6f65018dd57689ac43
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Perceiver
+Overview
+The Perceiver IO model was proposed in Perceiver IO: A General Architecture for Structured Inputs &
+Outputs by Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch,
+Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Hénaff, Matthew M.
+Botvinick, Andrew Zisserman, Oriol Vinyals, João Carreira.
+Perceiver IO is a generalization of Perceiver to handle arbitrary outputs in
+addition to arbitrary inputs. The original Perceiver only produced a single classification label. In addition to
+classification labels, Perceiver IO can produce (for example) language, optical flow, and multimodal videos with audio.
+This is done using the same building blocks as the original Perceiver. The computational complexity of Perceiver IO is
+linear in the input and output size and the bulk of the processing occurs in the latent space, allowing us to process
+inputs and outputs that are much larger than can be handled by standard Transformers. This means, for example,
+Perceiver IO can do BERT-style masked language modeling directly using bytes instead of tokenized inputs.
+The abstract from the paper is the following:
+The recently-proposed Perceiver model obtains good results on several domains (images, audio, multimodal, point
+clouds) while scaling linearly in compute and memory with the input size. While the Perceiver supports many kinds of
+inputs, it can only produce very simple outputs such as class scores. Perceiver IO overcomes this limitation without
+sacrificing the original's appealing properties by learning to flexibly query the model's latent space to produce
+outputs of arbitrary size and semantics. Perceiver IO still decouples model depth from data size and still scales
+linearly with data size, but now with respect to both input and output sizes. The full Perceiver IO model achieves
+strong results on tasks with highly structured output spaces, such as natural language and visual understanding,
+StarCraft II, and multi-task and multi-modal domains. As highlights, Perceiver IO matches a Transformer-based BERT
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5f72da958f6d9f76d48fde6967bd51711de1970
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+baseline on the GLUE language benchmark without the need for input tokenization and achieves state-of-the-art
+performance on Sintel optical flow estimation.
+Here's a TLDR explaining how Perceiver works:
+The main problem with the self-attention mechanism of the Transformer is that the time and memory requirements scale
+quadratically with the sequence length. Hence, models like BERT and RoBERTa are limited to a max sequence length of 512
+tokens. Perceiver aims to solve this issue by, instead of performing self-attention on the inputs, perform it on a set
+of latent variables, and only use the inputs for cross-attention. In this way, the time and memory requirements don't
+depend on the length of the inputs anymore, as one uses a fixed amount of latent variables, like 256 or 512. These are
+randomly initialized, after which they are trained end-to-end using backpropagation.
+Internally, [PerceiverModel] will create the latents, which is a tensor of shape (batch_size, num_latents,
+d_latents). One must provide inputs (which could be text, images, audio, you name it!) to the model, which it will
+use to perform cross-attention with the latents. The output of the Perceiver encoder is a tensor of the same shape. One
+can then, similar to BERT, convert the last hidden states of the latents to classification logits by averaging along
+the sequence dimension, and placing a linear layer on top of that to project the d_latents to num_labels.
+This was the idea of the original Perceiver paper. However, it could only output classification logits. In a follow-up
+work, PerceiverIO, they generalized it to let the model also produce outputs of arbitrary size. How, you might ask? The
+idea is actually relatively simple: one defines outputs of an arbitrary size, and then applies cross-attention with the
+last hidden states of the latents, using the outputs as queries, and the latents as keys and values.
+So let's say one wants to perform masked language modeling (BERT-style) with the Perceiver. As the Perceiver's input
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..057a163d378dad659be24f85b80bae07f1fb15db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_2.txt
@@ -0,0 +1,7 @@
+length will not have an impact on the computation time of the self-attention layers, one can provide raw bytes,
+providing inputs of length 2048 to the model. If one now masks out certain of these 2048 tokens, one can define the
+outputs as being of shape: (batch_size, 2048, 768). Next, one performs cross-attention with the final hidden states
+of the latents to update the outputs tensor. After cross-attention, one still has a tensor of shape (batch_size,
+2048, 768). One can then place a regular language modeling head on top, to project the last dimension to the
+vocabulary size of the model, i.e. creating logits of shape (batch_size, 2048, 262) (as Perceiver uses a vocabulary
+size of 262 byte IDs).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d733a3c8520ffbaee4f58a3c71915bf5957ff30f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+Perceiver IO architecture. Taken from the original paper 
+This model was contributed by nielsr. The original code can be found
+here.
+
+Perceiver does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+The quickest way to get started with the Perceiver is by checking the tutorial
+  notebooks.
+Refer to the blog post if you want to fully understand how the model works and
+is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
+with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
+audio classification, video classification, etc.
+Text classification task guide
+Masked language modeling task guide
+Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46af8abd06fdab2b5c99d579ca68935878006157
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_4.txt
@@ -0,0 +1,45 @@
+Perceiver specific outputs
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverModelOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverDecoderOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMaskedLMOutput
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassifierOutput
+PerceiverConfig
+[[autodoc]] PerceiverConfig
+PerceiverTokenizer
+[[autodoc]] PerceiverTokenizer
+    - call
+PerceiverFeatureExtractor
+[[autodoc]] PerceiverFeatureExtractor
+    - call
+PerceiverImageProcessor
+[[autodoc]] PerceiverImageProcessor
+    - preprocess
+PerceiverTextPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverTextPreprocessor
+PerceiverImagePreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverImagePreprocessor
+PerceiverOneHotPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOneHotPreprocessor
+PerceiverAudioPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPreprocessor
+PerceiverMultimodalPreprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPreprocessor
+PerceiverProjectionDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionDecoder
+PerceiverBasicDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicDecoder
+PerceiverClassificationDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationDecoder
+PerceiverOpticalFlowDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverOpticalFlowDecoder
+PerceiverBasicVideoAutoencodingDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverBasicVideoAutoencodingDecoder
+PerceiverMultimodalDecoder
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalDecoder
+PerceiverProjectionPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverProjectionPostprocessor
+PerceiverAudioPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a1e0e0811b5a64392672a0b1336c29f70044d57
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_perceiver.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverAudioPostprocessor
+PerceiverClassificationPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverClassificationPostprocessor
+PerceiverMultimodalPostprocessor
+[[autodoc]] models.perceiver.modeling_perceiver.PerceiverMultimodalPostprocessor
+PerceiverModel
+[[autodoc]] PerceiverModel
+    - forward
+PerceiverForMaskedLM
+[[autodoc]] PerceiverForMaskedLM
+    - forward
+PerceiverForSequenceClassification
+[[autodoc]] PerceiverForSequenceClassification
+    - forward
+PerceiverForImageClassificationLearned
+[[autodoc]] PerceiverForImageClassificationLearned
+    - forward
+PerceiverForImageClassificationFourier
+[[autodoc]] PerceiverForImageClassificationFourier
+    - forward
+PerceiverForImageClassificationConvProcessing
+[[autodoc]] PerceiverForImageClassificationConvProcessing
+    - forward
+PerceiverForOpticalFlow
+[[autodoc]] PerceiverForOpticalFlow
+    - forward
+PerceiverForMultimodalAutoencoding
+[[autodoc]] PerceiverForMultimodalAutoencoding
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..428b6ade10a7c1f924f7c48b8134501e18c727f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Persimmon
+Overview
+The Persimmon model was created by ADEPT, and authored by Erich Elsen, Augustus Odena, Maxwell Nye, Sağnak Taşırlar, Tri Dao, Curtis Hawthorne, Deepak Moparthi, Arushi Somani.
+The authors introduced Persimmon-8B, a decoder model based on the classic transformers architecture, with query and key normalization. Persimmon-8B is a fully permissively-licensed model with approximately 8 billion parameters, released under the Apache license.  Some of the key attributes of Persimmon-8B are long context size (16K), performance, and capabilities for multimodal extensions.
+The authors showcase their approach to model evaluation, focusing on practical text generation, mirroring how users interact with language models. The work also includes a comparative analysis, pitting Persimmon-8B against other prominent models (MPT 7B Instruct and Llama 2 Base 7B 1-Shot), across various evaluation tasks. The results demonstrate Persimmon-8B's competitive performance, even with limited training data.
+In terms of model details, the work outlines the architecture and training methodology of Persimmon-8B, providing insights into its design choices, sequence length, and dataset composition. The authors present a fast inference code that outperforms traditional implementations through operator fusion and CUDA graph utilization while maintaining code coherence. They express their anticipation of how the community will leverage this contribution to drive innovation, hinting at further upcoming releases as part of an ongoing series of developments.
+This model was contributed by ArthurZ.
+The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..228c612cec84307042b18948ba7524164b1b287f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+The Persimmon models were trained using bfloat16, but the original inference uses float16 The checkpoints uploaded on the hub use torch_dtype = 'float16' which will be
+used by the AutoModel API to cast the checkpoints from torch.float32 to torch.float16. 
+The dtype of the online weights is mostly irrelevant, unless you are using torch_dtype="auto" when initializing a model using model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto"). The reason is that the model will first be downloaded ( using the dtype of the checkpoints online) then it will be cast to the default dtype of torch (becomes torch.float32). Users should specify the torch_dtype they want, and if they don't it will be torch.float32.
+Finetuning the model in float16 is not recommended and known to produce nan, as such the model should be fine-tuned in bfloat16.
+
+Tips:
+
+To convert the model, you need to clone the original repository using git clone https://github.com/persimmon-ai-labs/adept-inference, then get the checkpoints:
+
+git clone https://github.com/persimmon-ai-labs/adept-inference
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_base_model_release.tar
+tar -xvf 8b_base_model_release.tar
+python src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py  --input_dir /path/to/downloaded/persimmon/weights/ --output_dir /output/path \
+    --pt_model_path /path/to/8b_chat_model_release/iter_0001251/mp_rank_00/model_optim_rng.pt
+    --ada_lib_path /path/to/adept-inference
+For the chat model:
+
+wget https://axtkn4xl5cip.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axtkn4xl5cip/b/adept-public-data/o/8b_chat_model_release.tar
+tar -xvf 8b_base_model_release.tar
+Thereafter, models can be loaded via:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2350486d9856b677737b7ccb93be771d1abd184
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_persimmon.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+from transformers import PersimmonForCausalLM, PersimmonTokenizer
+model = PersimmonForCausalLM.from_pretrained("/output/path")
+tokenizer = PersimmonTokenizer.from_pretrained("/output/path")
+
+Perismmon uses a sentencepiece based tokenizer, with a Unigram model. It supports bytefallback, which is only available in tokenizers==0.14.0 for the fast tokenizer.
+The LlamaTokenizer is used as it is a standard wrapper around sentencepiece. The chat template will be updated with the templating functions in a follow up PR!
+
+The authors suggest to use the following prompt format for the chat mode: f"human: {prompt}\n\nadept:"
+
+PersimmonConfig
+[[autodoc]] PersimmonConfig
+PersimmonModel
+[[autodoc]] PersimmonModel
+    - forward
+PersimmonForCausalLM
+[[autodoc]] PersimmonForCausalLM
+    - forward
+PersimmonForSequenceClassification
+[[autodoc]] PersimmonForSequenceClassification
+    - forward
+PersimmonForTokenClassification
+[[autodoc]] PersimmonForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6fc13557e48955e221b1838c1e965e7d956ba9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Phi
+Overview
+The Phi-1 model was proposed in Textbooks Are All You Need by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li.
+The Phi-1.5 model was proposed in Textbooks Are All You Need II: phi-1.5 technical report by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+Summary
+In Phi-1 and Phi-1.5 papers, the authors showed how important the quality of the data is in training relative to the model size.
+They selected high quality "textbook" data alongside with synthetically generated data for training their small sized Transformer
+based model Phi-1 with 1.3B parameters. Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP.
+They follow the same strategy for Phi-1.5 and created another 1.3B parameter model with performance on natural language tasks comparable
+to models 5x larger, and surpassing most non-frontier LLMs. Phi-1.5 exhibits many of the traits of much larger LLMs such as the ability
+to “think step by step” or perform some rudimentary in-context learning.
+With these two experiments the authors successfully showed the huge impact of quality of training data when training machine learning models.
+The abstract from the Phi-1 paper is the following:
+We introduce phi-1, a new large language model for code, with significantly smaller size than
+competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a98e9d3c321f1554cff853bff8d526cb6fd4406
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+8 A100s, using a selection of “textbook quality” data from the web (6B tokens) and synthetically
+generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains
+pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent
+properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding
+exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as
+phi-1 that still achieves 45% on HumanEval.
+The abstract from the Phi-1.5 paper is the following:
+We continue the investigation into the power of smaller Transformer-based language models as
+initiated by TinyStories – a 10 million parameter model that can produce coherent English – and
+the follow-up work on phi-1, a 1.3 billion parameter model with Python coding performance close
+to the state-of-the-art. The latter work proposed to use existing Large Language Models (LLMs) to
+generate “textbook quality” data as a way to enhance the learning process compared to traditional
+web data. We follow the “Textbooks Are All You Need” approach, focusing this time on common
+sense reasoning in natural language, and create a new 1.3 billion parameter model named phi-1.5,
+with performance on natural language tasks comparable to models 5x larger, and surpassing most
+non-frontier LLMs on more complex reasoning tasks such as grade-school mathematics and basic
+coding. More generally, phi-1.5 exhibits many of the traits of much larger LLMs, both good –such
+as the ability to “think step by step” or perform some rudimentary in-context learning– and bad,
+including hallucinations and the potential for toxic and biased generations –encouragingly though, we
+are seeing improvement on that front thanks to the absence of web data. We open-source phi-1.5 to
+promote further research on these urgent topics.
+This model was contributed by Susnato Dhar.
+The original code for Phi-1, Phi-1.5 and Phi-2 can be found here, here and here, respectively.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cdd52aa71aa181bf0ab3b891a560d669af6fa31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_2.txt
@@ -0,0 +1,42 @@
+This model is quite similar to Llama with the main difference in [PhiDecoderLayer], where they used [PhiAttention] and [PhiMLP] layers in parallel configuration.
+The tokenizer used for this model is identical to the [CodeGenTokenizer].
+
+How to use Phi-2
+
+Phi-2 has been integrated in the development version (4.37.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+inputs = tokenizer('Can you help me write a formal email to a potential business partner proposing a joint venture?', return_tensors="pt", return_attention_mask=False)
+outputs = model.generate(**inputs, max_length=30)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+Can you help me write a formal email to a potential business partner proposing a joint venture?
+Input: Company A: ABC Inc.
+Company B
+
+Example :
+thon
+
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
+tokenizer.batch_decode(generated_output)[0]
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..187bd3412892fbec84c2347d09efa66df9fb5075
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+Combining Phi and Flash Attention 2
+First, make sure to install the latest version of Flash Attention 2 to include the sliding window attention feature.
+
+pip install -U flash-attn --no-build-isolation
+Make also sure that you have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of flash-attn repository. Make also sure to load your model in half-precision (e.g. `torch.float16``)
+To load and run a model using Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import PhiForCausalLM, AutoTokenizer
+define the model and tokenizer and push the model and tokens to the GPU.
+model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")  # doctest: +SKIP
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
+feel free to change the prompt to your liking.
+prompt = "If I were an AI that had just achieved"
+apply the tokenizer.
+tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
+use the model to generate new tokens.
+generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)  # doctest: +SKIP
+tokenizer.batch_decode(generated_output)[0]  # doctest: +SKIP
+'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
+
+Expected speedups
+Below is an expected speedup diagram that compares pure inference time between the native implementation in transformers using microsoft/phi-1 checkpoint and the Flash Attention 2 version of the model using a sequence length of 2048.
+
+PhiConfig
+[[autodoc]] PhiConfig
+
+PhiModel
+[[autodoc]] PhiModel
+    - forward
+PhiForCausalLM
+[[autodoc]] PhiForCausalLM
+    - forward
+    - generate
+PhiForSequenceClassification
+[[autodoc]] PhiForSequenceClassification
+    - forward
+PhiForTokenClassification
+[[autodoc]] PhiForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cc6e234b13a08ff21be01de268ed816d1dfa99d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Phi-3
+Overview
+The Phi-3 model was proposed in Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone by Microsoft.
+Summary
+The abstract from the Phi-3 paper is the following:
+We introduce phi-3-mini, a 3.8 billion parameter language model trained on 3.3 trillion tokens, whose overall performance, as measured by both academic benchmarks and internal testing, rivals that of models such as Mixtral 8x7B and GPT-3.5 (e.g., phi-3-mini achieves 69% on MMLU and 8.38 on MT-bench), despite being small enough to be deployed on a phone. The innovation lies entirely in our dataset for training, a scaled-up version of the one used for phi-2, composed of heavily filtered web data and synthetic data. The model is also further aligned for robustness, safety, and chat format. We also provide some initial parameter-scaling results with a 7B and 14B models trained for 4.8T tokens, called phi-3-small and phi-3-medium, both significantly more capable than phi-3-mini (e.g., respectively 75% and 78% on MMLU, and 8.7 and 8.9 on MT-bench).
+The original code for Phi-3 can be found here.
+Usage tips
+
+This model is very similar to Llama with the main difference of [Phi3SuScaledRotaryEmbedding] and [Phi3YarnScaledRotaryEmbedding], where they are used to extend the context of the rotary embeddings. The query, key and values are fused, and the MLP's up and gate projection layers are also fused.
+The tokenizer used for this model is identical to the [LlamaTokenizer], with the exception of additional tokens.
+
+How to use Phi-3
+
+Phi-3 has been integrated in the development version (4.40.0.dev) of transformers. Until the official version is released through pip, ensure that you are doing one of the following:
+
+When loading the model, ensure that trust_remote_code=True is passed as an argument of the from_pretrained() function.
+
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdebdf38134bc91cd054b224e3bb4d7c2e7a601d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phi3.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Update your local transformers to the development version: pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers. The previous command is an alternative to cloning and installing from the source.
+
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+messages = [{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
+outputs = model.generate(inputs, max_new_tokens=32)
+text = tokenizer.batch_decode(outputs)[0]
+print(text)
+<|user|> 
+Can you provide ways to eat combinations of bananas and dragonfruits?<|end|> 
+<|assistant|> 
+Certainly! Bananas and dragonfruits can be combined in various delicious ways. Here are some ideas for eating combinations of bananas and
+
+Phi3Config
+[[autodoc]] Phi3Config
+
+Phi3Model
+[[autodoc]] Phi3Model
+    - forward
+Phi3ForCausalLM
+[[autodoc]] Phi3ForCausalLM
+    - forward
+    - generate
+Phi3ForSequenceClassification
+[[autodoc]] Phi3ForSequenceClassification
+    - forward
+Phi3ForTokenClassification
+[[autodoc]] Phi3ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phobert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23f7107aaa727b5621bad4b78c097e36b6c9b9c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_phobert.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+PhoBERT
+Overview
+The PhoBERT model was proposed in PhoBERT: Pre-trained language models for Vietnamese by Dat Quoc Nguyen, Anh Tuan Nguyen.
+The abstract from the paper is the following:
+We present PhoBERT with two versions, PhoBERT-base and PhoBERT-large, the first public large-scale monolingual
+language models pre-trained for Vietnamese. Experimental results show that PhoBERT consistently outperforms the recent
+best pre-trained multilingual model XLM-R (Conneau et al., 2020) and improves the state-of-the-art in multiple
+Vietnamese-specific NLP tasks including Part-of-speech tagging, Dependency parsing, Named-entity recognition and
+Natural language inference.
+This model was contributed by dqnguyen. The original code can be found here.
+Usage example
+thon
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+phobert = AutoModel.from_pretrained("vinai/phobert-base")
+tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
+INPUT TEXT MUST BE ALREADY WORD-SEGMENTED!
+line = "Tôi là sinh_viên trường đại_học Công_nghệ ."
+input_ids = torch.tensor([tokenizer.encode(line)])
+with torch.no_grad():
+     features = phobert(input_ids)  # Models outputs are now tuples
+With TensorFlow 2.0+:
+from transformers import TFAutoModel
+phobert = TFAutoModel.from_pretrained("vinai/phobert-base")
+
+ 
+PhoBERT implementation is the same as BERT, except for tokenization. Refer to EART documentation for information on 
+configuration classes and their parameters. PhoBERT-specific tokenizer is documented below.  
+
+PhobertTokenizer
+[[autodoc]] PhobertTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ea4c92a6e31211ce4df7e6b73e0f97d360fd0381
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Pix2Struct
+Overview
+The Pix2Struct model was proposed in Pix2Struct: Screenshot Parsing as Pretraining for Visual Language Understanding by Kenton Lee, Mandar Joshi, Iulia Turc, Hexiang Hu, Fangyu Liu, Julian Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, Kristina Toutanova.
+The abstract from the paper is the following:
+
+Visually-situated language is ubiquitous -- sources range from textbooks with diagrams to web pages with images and tables, to mobile apps with buttons and forms. Perhaps due to this diversity, previous work has typically relied on domain-specific recipes with limited sharing of the underlying data, model architectures, and objectives. We present Pix2Struct, a pretrained image-to-text model for purely visual language understanding, which can be finetuned on tasks containing visually-situated language. Pix2Struct is pretrained by learning to parse masked screenshots of web pages into simplified HTML. The web, with its richness of visual elements cleanly reflected in the HTML structure, provides a large source of pretraining data well suited to the diversity of downstream tasks. Intuitively, this objective subsumes common pretraining signals such as OCR, language modeling, image captioning. In addition to the novel pretraining strategy, we introduce a variable-resolution input representation and a more flexible integration of language and vision inputs, where language prompts such as questions are rendered directly on top of the input image. For the first time, we show that a single pretrained model can achieve state-of-the-art results in six out of nine tasks across four domains: documents, illustrations, user interfaces, and natural images.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad053a8d8636c7af02ac75964f601fed932c5f2c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pix2struct.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Tips:
+Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. The full list can be found in Table 1 of the paper.
+We therefore advise you to use these models for the tasks they have been fine tuned on. For instance, if you want to use Pix2Struct for UI captioning, you should use the model fine tuned on the UI dataset. If you want to use Pix2Struct for image captioning, you should use the model fine tuned on the natural images captioning dataset and so on.
+If you want to use the model to perform conditional text captioning, make sure to use the processor with add_special_tokens=False.
+This model was contributed by ybelkada.
+The original code can be found here.
+Resources
+
+Fine-tuning Notebook
+All models
+
+Pix2StructConfig
+[[autodoc]] Pix2StructConfig
+    - from_text_vision_configs
+Pix2StructTextConfig
+[[autodoc]] Pix2StructTextConfig
+Pix2StructVisionConfig
+[[autodoc]] Pix2StructVisionConfig
+Pix2StructProcessor
+[[autodoc]] Pix2StructProcessor
+Pix2StructImageProcessor
+[[autodoc]] Pix2StructImageProcessor
+    - preprocess
+Pix2StructTextModel
+[[autodoc]] Pix2StructTextModel
+    - forward
+Pix2StructVisionModel
+[[autodoc]] Pix2StructVisionModel
+    - forward
+Pix2StructForConditionalGeneration
+[[autodoc]] Pix2StructForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..036a79de9eb06b113bf0b412b9497a43b1cabfa0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+PLBart
+Overview
+The PLBART model was proposed in Unified Pre-training for Program Understanding and Generation by Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, Kai-Wei Chang.
+This is a BART-like model which can be used to perform code-summarization, code-generation, and code-translation tasks. The pre-trained model plbart-base has been trained using multilingual denoising task
+on Java, Python and English.
+According to the abstract
+Code summarization and generation empower conversion between programming language (PL) and natural language (NL),
+while code translation avails the migration of legacy code from one PL to another. This paper introduces PLBART, 
+a sequence-to-sequence model capable of performing a broad spectrum of program and language understanding and generation tasks.
+PLBART is pre-trained on an extensive collection of Java and Python functions and associated NL text via denoising autoencoding.
+Experiments on code summarization in the English language, code generation, and code translation in seven programming languages
+show that PLBART outperforms or rivals state-of-the-art models. Moreover, experiments on discriminative tasks, e.g., program
+repair, clone detection, and vulnerable code detection, demonstrate PLBART's effectiveness in program understanding.
+Furthermore, analysis reveals that PLBART learns program syntax, style (e.g., identifier naming convention), logical flow
+(e.g., if block inside an else block is equivalent to else if block) that are crucial to program semantics and thus excels
+even with limited annotations.
+This model was contributed by gchhablani. The Authors' code can be found here.
+Usage examples
+PLBart is a multilingual encoder-decoder (sequence-to-sequence) model primarily intended for code-to-text, text-to-code, code-to-code tasks. As the
+model is multilingual it expects the sequences in a different format. A special language id token is added in both the
+source and target text. The source text format is X [eos, src_lang_code] where X is the source text. The
+target text format is [tgt_lang_code] X [eos]. bos is never used.
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..64689626b9eb533287e505a598a134d7987713c5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+However, for fine-tuning, in some cases no language token is provided in cases where a single language is used. Please refer to the paper to learn more about this.
+In cases where the language code is needed, the regular [~PLBartTokenizer.__call__] will encode source text format 
+when you pass texts as the first argument or with the keyword argument text, and will encode target text format if
+it's passed with the text_target keyword argument.
+Supervised training
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..862125825fa301dddf5f0bd45422f67614fb2c3d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_plbart.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-base", src_lang="en_XX", tgt_lang="python")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+expected_translation_english = "Returns the maximum value of a b c."
+inputs = tokenizer(example_python_phrase, text_target=expected_translation_english, return_tensors="pt")
+model(**inputs)
+
+Generation
+While generating the target text set the decoder_start_token_id to the target language id. The following
+  example shows how to translate Python to English using the uclanlp/plbart-python-en_XX model.
+thon
+
+from transformers import PLBartForConditionalGeneration, PLBartTokenizer
+tokenizer = PLBartTokenizer.from_pretrained("uclanlp/plbart-python-en_XX", src_lang="python", tgt_lang="en_XX")
+example_python_phrase = "def maximum(a,b,c):NEW_LINE_INDENTreturn max([a,b,c])"
+inputs = tokenizer(example_python_phrase, return_tensors="pt")
+model = PLBartForConditionalGeneration.from_pretrained("uclanlp/plbart-python-en_XX")
+translated_tokens = model.generate(**inputs, decoder_start_token_id=tokenizer.lang_code_to_id["en_XX"])
+tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+"Returns the maximum value of a b c."
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+PLBartConfig
+[[autodoc]] PLBartConfig
+PLBartTokenizer
+[[autodoc]] PLBartTokenizer
+    - build_inputs_with_special_tokens
+PLBartModel
+[[autodoc]] PLBartModel
+    - forward
+PLBartForConditionalGeneration
+[[autodoc]] PLBartForConditionalGeneration
+    - forward
+PLBartForSequenceClassification
+[[autodoc]] PLBartForSequenceClassification
+    - forward
+PLBartForCausalLM
+[[autodoc]] PLBartForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20a83b6fd958f6f9eac0a2b4a4e51a9ede926686
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+PoolFormer
+Overview
+The PoolFormer model was proposed in MetaFormer is Actually What You Need for Vision  by Sea AI Labs. Instead of designing complicated token mixer to achieve SOTA performance, the target of this work is to demonstrate the competence of transformer models largely stem from the general architecture MetaFormer.
+The abstract from the paper is the following:
+Transformers have shown great potential in computer vision tasks. A common belief is their attention-based token mixer module contributes most to their competence. However, recent works show the attention-based module in transformers can be replaced by spatial MLPs and the resulted models still perform quite well. Based on this observation, we hypothesize that the general architecture of the transformers, instead of the specific token mixer module, is more essential to the model's performance. To verify this, we deliberately replace the attention module in transformers with an embarrassingly simple spatial pooling operator to conduct only the most basic token mixing. Surprisingly, we observe that the derived model, termed as PoolFormer, achieves competitive performance on multiple computer vision tasks. For example, on ImageNet-1K, PoolFormer achieves 82.1% top-1 accuracy, surpassing well-tuned vision transformer/MLP-like baselines DeiT-B/ResMLP-B24 by 0.3%/1.1% accuracy with 35%/52% fewer parameters and 48%/60% fewer MACs. The effectiveness of PoolFormer verifies our hypothesis and urges us to initiate the concept of "MetaFormer", a general architecture abstracted from transformers without specifying the token mixer. Based on the extensive experiments, we argue that MetaFormer is the key player in achieving superior results for recent transformer and MLP-like models on vision tasks. This work calls for more future research dedicated to improving MetaFormer instead of focusing on the token mixer modules. Additionally, our proposed PoolFormer could serve as a starting baseline for future MetaFormer architecture design.
+The figure below illustrates the architecture of PoolFormer. Taken from the original paper.
+
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be8127a7261c0bcafd883ae607875bb540b162fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+This model was contributed by heytanay. The original code can be found here.
+Usage tips
+
+PoolFormer has a hierarchical architecture, where instead of Attention, a simple Average Pooling layer is present. All checkpoints of the model can be found on the hub.
+One can use [PoolFormerImageProcessor] to prepare images for the model.
+As most models, PoolFormer comes in different sizes, the details of which can be found in the table below.
+
+| Model variant | Depths    | Hidden sizes    | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :------------: | :-------------------: |
+| s12               | [2, 2, 6, 2]  | [64, 128, 320, 512] | 12             | 77.2                  |
+| s24               | [4, 4, 12, 4] | [64, 128, 320, 512] | 21             | 80.3                  |
+| s36               | [6, 6, 18, 6] | [64, 128, 320, 512] | 31             | 81.4                  |
+| m36               | [6, 6, 18, 6] | [96, 192, 384, 768] | 56             | 82.1                  |
+| m48               | [8, 8, 24, 8] | [96, 192, 384, 768] | 73             | 82.5                  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with PoolFormer.
+
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e87f0075dbcb3831e985854010f9f7ff2917e28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_poolformer.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+[PoolFormerForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+PoolFormerConfig
+[[autodoc]] PoolFormerConfig
+PoolFormerFeatureExtractor
+[[autodoc]] PoolFormerFeatureExtractor
+    - call
+PoolFormerImageProcessor
+[[autodoc]] PoolFormerImageProcessor
+    - preprocess
+PoolFormerModel
+[[autodoc]] PoolFormerModel
+    - forward
+PoolFormerForImageClassification
+[[autodoc]] PoolFormerForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..43eedc25e830fffba1796d3b3fae19cd69da99fb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+Pop2Piano
+
+Overview
+The Pop2Piano model was proposed in Pop2Piano : Pop Audio-based Piano Cover Generation by Jongho Choi and Kyogu Lee.
+Piano covers of pop music are widely enjoyed, but generating them from music is not a trivial task. It requires great 
+expertise with playing piano as well as knowing different characteristics and melodies of a song. With Pop2Piano you 
+can directly generate a cover from a song's audio waveform. It is the first model to directly generate a piano cover 
+from pop audio without melody and chord extraction modules. 
+Pop2Piano is an encoder-decoder Transformer model based on T5. The input audio 
+is transformed to its waveform and passed to the encoder, which transforms it to a latent representation. The decoder 
+uses these latent representations to generate token ids in an autoregressive way. Each token id corresponds to one of four 
+different token types: time, velocity, note and 'special'. The token ids are then decoded to their equivalent MIDI file.
+The abstract from the paper is the following:
+Piano covers of pop music are enjoyed by many people. However, the
+task of automatically generating piano covers of pop music is still
+understudied. This is partly due to the lack of synchronized
+{Pop, Piano Cover} data pairs, which made it challenging to apply
+the latest data-intensive deep learning-based methods. To leverage
+the power of the data-driven approach, we make a large amount of
+paired and synchronized {Pop, Piano Cover} data using an automated
+pipeline. In this paper, we present Pop2Piano, a Transformer network
+that generates piano covers given waveforms of pop music. To the best
+of our knowledge, this is the first model to generate a piano cover
+directly from pop audio without using melody and chord extraction
+modules. We show that Pop2Piano, trained with our dataset, is capable
+of producing plausible piano covers.
+This model was contributed by Susnato Dhar.
+The original code can be found here.
+Usage tips
+
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a88e262a352f63106f167634198ea7b308b43d83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+To use Pop2Piano, you will need to install the 🤗 Transformers library, as well as the following third party modules:
+
+pip install pretty-midi==0.2.9 essentia==2.1b6.dev1034 librosa scipy
+Please note that you may need to restart your runtime after installation.
+Pop2Piano is an Encoder-Decoder based model like T5.
+Pop2Piano can be used to generate midi-audio files for a given audio sequence.
+Choosing different composers in Pop2PianoForConditionalGeneration.generate() can lead to variety of different results.
+Setting the sampling rate to 44.1 kHz when loading the audio file can give good performance.
+Though Pop2Piano was mainly trained on Korean Pop music, it also does pretty well on other Western Pop or Hip Hop songs.
+
+Examples
+
+Example using HuggingFace Dataset:
+
+thon
+
+from datasets import load_dataset
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+ds = load_dataset("sweetcocoa/pop2piano_ci", split="test")
+inputs = processor(
+     audio=ds["audio"][0]["array"], sampling_rate=ds["audio"][0]["sampling_rate"], return_tensors="pt"
+ )
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example using your own audio file:
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..464b940d83058d34dd62309196a55fbd305f9a20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+Example using your own audio file:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+audio, sr = librosa.load("", sr=44100)  # feel free to change the sr to a suitable value.
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=audio, sampling_rate=sr, return_tensors="pt")
+model_output = model.generate(input_features=inputs["input_features"], composer="composer1")
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"][0]
+tokenizer_output.write("./Outputs/midi_output.mid")
+
+Example of processing multiple audio files in batch:
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
+inputs = processor(audio=[audio1, audio2], sampling_rate=[sr1, sr2], return_attention_mask=True, return_tensors="pt")
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = processor.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd064d11e7cd0f08e38b781b400adc45a4ced243
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pop2piano.txt_chunk_3.txt
@@ -0,0 +1,46 @@
+Example of processing multiple audio files in batch (Using Pop2PianoFeatureExtractor and Pop2PianoTokenizer):
+
+thon
+
+import librosa
+from transformers import Pop2PianoForConditionalGeneration, Pop2PianoFeatureExtractor, Pop2PianoTokenizer
+feel free to change the sr to a suitable value.
+audio1, sr1 = librosa.load("", sr=44100)
+audio2, sr2 = librosa.load("", sr=44100)
+model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano")
+feature_extractor = Pop2PianoFeatureExtractor.from_pretrained("sweetcocoa/pop2piano")
+tokenizer = Pop2PianoTokenizer.from_pretrained("sweetcocoa/pop2piano")
+inputs = feature_extractor(
+     audio=[audio1, audio2], 
+     sampling_rate=[sr1, sr2], 
+     return_attention_mask=True, 
+     return_tensors="pt",
+ )
+Since we now generating in batch(2 audios) we must pass the attention_mask
+model_output = model.generate(
+     input_features=inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     composer="composer1",
+ )
+tokenizer_output = tokenizer.batch_decode(
+     token_ids=model_output, feature_extractor_output=inputs
+ )["pretty_midi_objects"]
+Since we now have 2 generated MIDI files
+tokenizer_output[0].write("./Outputs/midi_output1.mid")
+tokenizer_output[1].write("./Outputs/midi_output2.mid")
+
+Pop2PianoConfig
+[[autodoc]] Pop2PianoConfig
+Pop2PianoFeatureExtractor
+[[autodoc]] Pop2PianoFeatureExtractor
+    - call
+Pop2PianoForConditionalGeneration
+[[autodoc]] Pop2PianoForConditionalGeneration
+    - forward
+    - generate
+Pop2PianoTokenizer
+[[autodoc]] Pop2PianoTokenizer
+    - call
+Pop2PianoProcessor
+[[autodoc]] Pop2PianoProcessor
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfb8a32bd70e07fe94984e3041df2387c1792001
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+ProphetNet
+
+Overview
+The ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of just
+the next token.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Usage tips
+
+ProphetNet is a model with absolute position embeddings so it's usually advised to pad the inputs on the right rather than
+  the left.
+The model architecture is based on the original Transformer, but replaces the “standard” self-attention mechanism in the decoder by a a main self-attention mechanism and a self and n-stream (predict) self-attention mechanism.
+
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..631cb1defefa87a134c771702b1aa99031a7a7ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_prophetnet.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+ProphetNetConfig
+[[autodoc]] ProphetNetConfig
+ProphetNetTokenizer
+[[autodoc]] ProphetNetTokenizer
+ProphetNet specific outputs
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqLMOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetSeq2SeqModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderModelOutput
+[[autodoc]] models.prophetnet.modeling_prophetnet.ProphetNetDecoderLMOutput
+ProphetNetModel
+[[autodoc]] ProphetNetModel
+    - forward
+ProphetNetEncoder
+[[autodoc]] ProphetNetEncoder
+    - forward
+ProphetNetDecoder
+[[autodoc]] ProphetNetDecoder
+    - forward
+ProphetNetForConditionalGeneration
+[[autodoc]] ProphetNetForConditionalGeneration
+    - forward
+ProphetNetForCausalLM
+[[autodoc]] ProphetNetForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93de399650fd1f7c3d964d422ac7895fbf754e18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Pyramid Vision Transformer (PVT)
+Overview
+The PVT model was proposed in
+Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions
+by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. The PVT is a type of
+vision transformer that utilizes a pyramid structure to make it an effective backbone for dense prediction tasks. Specifically
+it allows for more fine-grained inputs (4 x 4 pixels per patch) to be used, while simultaneously shrinking the sequence length
+of the Transformer as it deepens - reducing the computational cost. Additionally, a spatial-reduction attention (SRA) layer
+is used to further reduce the resource consumption when learning high-resolution features.
+The abstract from the paper is the following:
+Although convolutional neural networks (CNNs) have achieved great success in computer vision, this work investigates a 
+simpler, convolution-free backbone network useful for many dense prediction tasks. Unlike the recently proposed Vision 
+Transformer (ViT) that was designed for image classification specifically, we introduce the Pyramid Vision Transformer 
+(PVT), which overcomes the difficulties of porting Transformer to various dense prediction tasks. PVT has several 
+merits compared to current state of the arts. Different from ViT that typically yields low resolution outputs and 
+incurs high computational and memory costs, PVT not only can be trained on dense partitions of an image to achieve high 
+output resolution, which is important for dense prediction, but also uses a progressive shrinking pyramid to reduce the 
+computations of large feature maps. PVT inherits the advantages of both CNN and Transformer, making it a unified 
+backbone for various vision tasks without convolutions, where it can be used as a direct replacement for CNN backbones. 
+We validate PVT through extensive experiments, showing that it boosts the performance of many downstream tasks, including
+object detection, instance and semantic segmentation. For example, with a comparable number of parameters, PVT+RetinaNet 
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36a24f668c5aa1c38194c7d5f725e7550dea613a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+achieves 40.4 AP on the COCO dataset, surpassing ResNet50+RetinNet (36.3 AP) by 4.1 absolute AP (see Figure 2). We hope 
+that PVT could serve as an alternative and useful backbone for pixel-level predictions and facilitate future research.
+This model was contributed by Xrenya. The original code can be found here.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2ffdc45978f311ae3172390658c46c706de9a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt.txt_chunk_2.txt
@@ -0,0 +1,19 @@
+PVTv1 on ImageNet-1K
+
+| Model variant  |Size |Acc@1|Params (M)|
+|--------------------|:-------:|:-------:|:------------:|
+| PVT-Tiny           |    224  |   75.1  |     13.2     |
+| PVT-Small          |    224  |   79.8  |     24.5     |
+| PVT-Medium         |    224  |   81.2  |     44.2     |
+| PVT-Large          |    224  |   81.7  |     61.4     |
+PvtConfig
+[[autodoc]] PvtConfig
+PvtImageProcessor
+[[autodoc]] PvtImageProcessor
+    - preprocess
+PvtForImageClassification
+[[autodoc]] PvtForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cd982e750b4335dcd8723016a453324ddade25f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Pyramid Vision Transformer V2 (PVTv2)
+Overview
+The PVTv2 model was proposed in
+PVT v2: Improved Baselines with Pyramid Vision Transformer by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, and Ling Shao. As an improved variant of PVT, it eschews position embeddings, relying instead on positional information encoded through zero-padding and overlapping patch embeddings. This lack of reliance on position embeddings simplifies the architecture, and enables running inference at any resolution without needing to interpolate them.
+The PVTv2 encoder structure has been successfully deployed to achieve state-of-the-art scores in Segformer for semantic segmentation, GLPN for monocular depth, and Panoptic Segformer for panoptic segmentation.
+PVTv2 belongs to a family of models called hierarchical transformers , which make adaptations to transformer layers in order to generate multi-scale feature maps. Unlike the columnal structure of Vision Transformer (ViT) which loses fine-grained detail, multi-scale feature maps are known preserve this detail and aid performance in dense prediction tasks. In the case of PVTv2, this is achieved by generating image patch tokens using 2D convolution with overlapping kernels in each encoder layer.
+The multi-scale features of hierarchical transformers allow them to be easily swapped in for traditional workhorse computer vision backbone models like ResNet in larger architectures. Both Segformer and Panoptic Segformer demonstrated that configurations using PVTv2 for a backbone consistently outperformed those with similarly sized ResNet backbones. 
+Another powerful feature of the PVTv2 is the complexity reduction in the self-attention layers called Spatial Reduction Attention (SRA), which uses 2D convolution layers to project hidden states to a smaller resolution before attending to them with the queries, improving the $O(n^2)$ complexity of self-attention to $O(n^2/R)$, with $R$ being the spatial reduction ratio (sr_ratio, aka kernel size and stride in the 2D convolution).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbd90e5803b8f35906ec1437ac12fb639c06989f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+SRA was introduced in PVT, and is the default attention complexity reduction method used in PVTv2. However, PVTv2 also introduced the option of using a self-attention mechanism with linear complexity related to image size, which they called "Linear SRA". This method uses average pooling to reduce the hidden states to a fixed size that is invariant to their original resolution (although this is inherently more lossy than regular SRA). This option can be enabled by setting linear_attention to True in the PVTv2Config.
+Abstract from the paper:
+Transformer recently has presented encouraging progress in computer vision. In this work, we present new baselines by improving the original Pyramid Vision Transformer (PVT v1) by adding three designs, including (1) linear complexity attention layer, (2) overlapping patch embedding, and (3) convolutional feed-forward network. With these modifications, PVT v2 reduces the computational complexity of PVT v1 to linear and achieves significant improvements on fundamental vision tasks such as classification, detection, and segmentation. Notably, the proposed PVT v2 achieves comparable or better performances than recent works such as Swin Transformer. We hope this work will facilitate state-of-the-art Transformer researches in computer vision. Code is available at https://github.com/whai362/PVT.
+This model was contributed by FoamoftheSea. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a293fae20849772a0bbdabf92b811f960344902b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+PVTv2 is a hierarchical transformer model which has demonstrated powerful performance in image classification and multiple other tasks, used as a backbone for semantic segmentation in Segformer, monocular depth estimation in GLPN, and panoptic segmentation in Panoptic Segformer, consistently showing higher performance than similar ResNet configurations.
+Hierarchical transformers like PVTv2 achieve superior data and parameter efficiency on image data compared with pure transformer architectures by incorporating design elements of convolutional neural networks (CNNs) into their encoders. This creates a best-of-both-worlds architecture that infuses the useful inductive biases of CNNs like translation equivariance and locality into the network while still enjoying the benefits of dynamic data response and global relationship modeling provided by the self-attention mechanism of transformers.
+PVTv2 uses overlapping patch embeddings to create multi-scale feature maps, which are infused with location information using zero-padding and depth-wise convolutions.
+To reduce the complexity in the attention layers, PVTv2 performs a spatial reduction on the hidden states using either strided 2D convolution (SRA) or fixed-size average pooling (Linear SRA). Although inherently more lossy, Linear SRA provides impressive performance with a linear complexity with respect to image size. To use Linear SRA in the self-attention layers, set linear_attention=True in the PvtV2Config.
+[PvtV2Model] is the hierarchical transformer encoder (which is also often referred to as Mix Transformer or MiT in the literature). [PvtV2ForImageClassification] adds a simple classifier head on top to perform Image Classification. [PvtV2Backbone] can be used with the [AutoBackbone] system in larger architectures like Deformable DETR.
+ImageNet pretrained weights for all model sizes can be found on the hub.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b2313f819c9a7100a7082da3f1b85c2c0c075e7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+The best way to get started with the PVTv2 is to load the pretrained checkpoint with the size of your choosing using AutoModelForImageClassification:
+thon
+import requests
+import torch
+from transformers import AutoModelForImageClassification, AutoImageProcessor
+from PIL import Image
+model = AutoModelForImageClassification.from_pretrained("OpenGVLab/pvt_v2_b0")
+image_processor = AutoImageProcessor.from_pretrained("OpenGVLab/pvt_v2_b0")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
+
+To use the PVTv2 as a backbone for more complex architectures like DeformableDETR, you can use AutoBackbone (this model would need fine-tuning as you're replacing the backbone in the pretrained model):
+thon
+import requests
+import torch
+from transformers import AutoConfig, AutoModelForObjectDetection, AutoImageProcessor
+from PIL import Image
+model = AutoModelForObjectDetection.from_config(
+    config=AutoConfig.from_pretrained(
+        "SenseTime/deformable-detr",
+        backbone_config=AutoConfig.from_pretrained("OpenGVLab/pvt_v2_b5"),
+        use_timm_backbone=False
+    ),
+)
+image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processed = image_processor(image)
+outputs = model(torch.tensor(processed["pixel_values"]))
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..532f733a386fea2865b33b5b83070f13a60eedaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_pvt_v2.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+PVTv2 performance on ImageNet-1K by model size (B0-B5):
+| Method           | Size | Acc@1 | #Params (M) |
+|------------------|:----:|:-----:|:-----------:|
+| PVT-V2-B0        |  224 |  70.5 |     3.7     |
+| PVT-V2-B1        |  224 |  78.7 |     14.0    |
+| PVT-V2-B2-Linear |  224 |  82.1 |     22.6    |
+| PVT-V2-B2        |  224 |  82.0 |     25.4    |
+| PVT-V2-B3        |  224 |  83.1 |     45.2    |
+| PVT-V2-B4        |  224 |  83.6 |     62.6    |
+| PVT-V2-B5        |  224 |  83.8 |     82.0    |
+PvtV2Config
+[[autodoc]] PvtV2Config
+PvtForImageClassification
+[[autodoc]] PvtV2ForImageClassification
+    - forward
+PvtModel
+[[autodoc]] PvtV2Model
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c95813d38743864160cd0d4f080b25b03290fd55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+QDQBERT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The QDQBERT model can be referenced in Integer Quantization for Deep Learning Inference: Principles and Empirical
+Evaluation by Hao Wu, Patrick Judd, Xiaojie Zhang, Mikhail Isaev and Paulius
+Micikevicius.
+The abstract from the paper is the following:
+Quantization techniques can reduce the size of Deep Neural Networks and improve inference latency and throughput by
+taking advantage of high throughput integer instructions. In this paper we review the mathematical aspects of
+quantization parameters and evaluate their choices on a wide range of neural network models for different application
+domains, including vision, speech, and language. We focus on quantization techniques that are amenable to acceleration
+by processors with high-throughput integer math pipelines. We also present a workflow for 8-bit quantization that is
+able to maintain accuracy within 1% of the floating-point baseline on all networks studied, including models that are
+more difficult to quantize, such as MobileNets and BERT-large.
+This model was contributed by shangz.
+Usage tips
+
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to (i) linear layer
+  inputs and weights, (ii) matmul inputs, (iii) residual add inputs, in BERT model.
+QDQBERT requires the dependency of Pytorch Quantization Toolkit. To install pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com
+QDQBERT model can be loaded from any checkpoint of HuggingFace BERT model (for example google-bert/bert-base-uncased), and
+  perform Quantization Aware Training/Post Training Quantization.
+A complete example of using QDQBERT model to perform Quatization Aware Training and Post Training Quantization for
+  SQUAD task can be found at transformers/examples/research_projects/quantization-qdqbert/.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94206a8a7a6e7817637c1b7555968965e5f80a63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_1.txt
@@ -0,0 +1,50 @@
+Set default quantizers
+QDQBERT model adds fake quantization operations (pair of QuantizeLinear/DequantizeLinear ops) to BERT by
+TensorQuantizer in Pytorch Quantization Toolkit. TensorQuantizer is the module
+for quantizing tensors, with QuantDescriptor defining how the tensor should be quantized. Refer to Pytorch
+Quantization Toolkit userguide for more details.
+Before creating QDQBERT model, one has to set the default QuantDescriptor defining default tensor quantizers.
+Example:
+thon
+
+import pytorch_quantization.nn as quant_nn
+from pytorch_quantization.tensor_quant import QuantDescriptor
+The default tensor quantizer is set to use Max calibration method
+input_desc = QuantDescriptor(num_bits=8, calib_method="max")
+The default tensor quantizer is set to be per-channel quantization for weights
+weight_desc = QuantDescriptor(num_bits=8, axis=((0,)))
+quant_nn.QuantLinear.set_default_quant_desc_input(input_desc)
+quant_nn.QuantLinear.set_default_quant_desc_weight(weight_desc)
+
+Calibration
+Calibration is the terminology of passing data samples to the quantizer and deciding the best scaling factors for
+tensors. After setting up the tensor quantizers, one can use the following example to calibrate the model:
+thon
+
+Find the TensorQuantizer and enable calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.enable_calib()
+         module.disable_quant()  # Use full precision data to calibrate
+Feeding data samples
+model(x)
+
+Finalize calibration
+for name, module in model.named_modules():
+     if name.endswith("_input_quantizer"):
+         module.load_calib_amax()
+         module.enable_quant()
+If running on GPU, it needs to call .cuda() again because new tensors will be created by calibration process
+model.cuda()
+Keep running the quantized model
+
+Export to ONNX
+The goal of exporting to ONNX is to deploy inference by TensorRT. Fake
+quantization will be broken into a pair of QuantizeLinear/DequantizeLinear ONNX ops. After setting static member of
+TensorQuantizer to use Pytorch’s own fake quantization functions, fake quantized model can be exported to ONNX, follow
+the instructions in torch.onnx. Example:
+thon
+
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41225a61ed26a921dceae7a2e4cc19e9f1cc1152
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qdqbert.txt_chunk_2.txt
@@ -0,0 +1,42 @@
+from pytorch_quantization.nn import TensorQuantizer
+TensorQuantizer.use_fb_fake_quant = True
+Load the calibrated model
+
+ONNX export
+torch.onnx.export()
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+QDQBertConfig
+[[autodoc]] QDQBertConfig
+QDQBertModel
+[[autodoc]] QDQBertModel
+    - forward
+QDQBertLMHeadModel
+[[autodoc]] QDQBertLMHeadModel
+    - forward
+QDQBertForMaskedLM
+[[autodoc]] QDQBertForMaskedLM
+    - forward
+QDQBertForSequenceClassification
+[[autodoc]] QDQBertForSequenceClassification
+    - forward
+QDQBertForNextSentencePrediction
+[[autodoc]] QDQBertForNextSentencePrediction
+    - forward
+QDQBertForMultipleChoice
+[[autodoc]] QDQBertForMultipleChoice
+    - forward
+QDQBertForTokenClassification
+[[autodoc]] QDQBertForTokenClassification
+    - forward
+QDQBertForQuestionAnswering
+[[autodoc]] QDQBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96b380c8dc4ff9d6bad9b026469eea2024ed2bdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Qwen2
+Overview
+Qwen2 is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2 is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. It is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Usage tips
+Qwen2-7B-beta and Qwen2-7B-Chat-beta can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen2-7B-Chat-beta for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6beb5d58d9520f6efdcaa6d42b7573e61e26e021
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Qwen2Config
+[[autodoc]] Qwen2Config
+Qwen2Tokenizer
+[[autodoc]] Qwen2Tokenizer
+    - save_vocabulary
+Qwen2TokenizerFast
+[[autodoc]] Qwen2TokenizerFast
+Qwen2Model
+[[autodoc]] Qwen2Model
+    - forward
+Qwen2ForCausalLM
+[[autodoc]] Qwen2ForCausalLM
+    - forward
+Qwen2ForSequenceClassification
+[[autodoc]] Qwen2ForSequenceClassification
+    - forward
+Qwen2ForTokenClassification
+[[autodoc]] Qwen2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96e04ef62baddc36e701945d2dc8029739922d7c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+Qwen2MoE
+Overview
+Qwen2MoE is the new model series of large language models from the Qwen team. Previously, we released the Qwen series, including Qwen-72B, Qwen-1.8B, Qwen-VL, Qwen-Audio, etc.
+Model Details
+Qwen2MoE is a language model series including decoder language models of different model sizes. For each size, we release the base language model and the aligned chat model. Qwen2MoE has the following architectural choices:
+
+Qwen2MoE is based on the Transformer architecture with SwiGLU activation, attention QKV bias, group query attention, mixture of sliding window attention and full attention, etc. Additionally, we have an improved tokenizer adaptive to multiple natural languages and codes.
+Qwen2MoE employs Mixture of Experts (MoE) architecture, where the models are upcycled from dense language models. For instance, Qwen1.5-MoE-A2.7B is upcycled from Qwen-1.8B. It has 14.3B parameters in total and 2.7B activated parameters during runtime, while it achieves comparable performance with Qwen1.5-7B, with only 25% of the training resources.
+
+For more details refer to the release blog post.
+Usage tips
+Qwen1.5-MoE-A2.7B and Qwen1.5-MoE-A2.7B-Chat can be found on the Huggingface Hub
+In the following, we demonstrate how to use Qwen1.5-MoE-A2.7B-Chat for the inference. Note that we have used the ChatML format for dialog, in this demo we show how to leverage apply_chat_template for this purpose.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5adc0dbdd7b274f03433cf42fc2f5a7a7dd8e6e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_qwen2_moe.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-MoE-A2.7B-Chat")
+prompt = "Give me a short introduction to large language model."
+messages = [{"role": "user", "content": prompt}]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+model_inputs = tokenizer([text], return_tensors="pt").to(device)
+generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True)
+generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
+response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+Qwen2MoeConfig
+[[autodoc]] Qwen2MoeConfig
+Qwen2MoeModel
+[[autodoc]] Qwen2MoeModel
+    - forward
+Qwen2MoeForCausalLM
+[[autodoc]] Qwen2MoeForCausalLM
+    - forward
+Qwen2MoeForSequenceClassification
+[[autodoc]] Qwen2MoeForSequenceClassification
+    - forward
+Qwen2MoeForTokenClassification
+[[autodoc]] Qwen2MoeForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a42e4cfa4f3dcf1fe5eefe45c3791a9f20a348
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_0.txt
@@ -0,0 +1 @@
+RAG
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..996371ee4661bf155c88dcb92b4d9563f115458b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Overview
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and
+sequence-to-sequence models. RAG models retrieve documents, pass them to a seq2seq model, then marginalize to generate
+outputs. The retriever and seq2seq modules are initialized from pretrained models, and fine-tuned jointly, allowing
+both retrieval and generation to adapt to downstream tasks.
+It is based on the paper Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks by Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir
+Karpukhin, Naman Goyal, Heinrich Küttler, Mike Lewis, Wen-tau Yih, Tim Rocktäschel, Sebastian Riedel, Douwe Kiela.
+The abstract from the paper is the following:
+Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve
+state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely
+manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind
+task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge
+remain open research problems. Pre-trained models with a differentiable access mechanism to explicit nonparametric
+memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a
+general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained
+parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a
+pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages
+across the whole generated sequence, the other can use different passages per token. We fine-tune and evaluate our
+models on a wide range of knowledge-intensive NLP tasks and set the state-of-the-art on three open domain QA tasks,
+outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7574f5ac940440c5ebf4884b881c28a117caf606
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art
+parametric-only seq2seq baseline.
+This model was contributed by ola13.
+Usage tips
+Retrieval-augmented generation ("RAG") models combine the powers of pretrained dense retrieval (DPR) and Seq2Seq models. 
+RAG models retrieve docs, pass them to a seq2seq model, then marginalize to generate outputs. The retriever and seq2seq 
+modules are initialized from pretrained models, and fine-tuned jointly, allowing both retrieval and generation to adapt 
+to downstream tasks.
+RagConfig
+[[autodoc]] RagConfig
+RagTokenizer
+[[autodoc]] RagTokenizer
+Rag specific outputs
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMMarginOutput
+[[autodoc]] models.rag.modeling_rag.RetrievAugLMOutput
+RagRetriever
+[[autodoc]] RagRetriever
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3883d9cf9613a89ef4b56ae6f24943a174d3c18
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rag.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+RagModel
+[[autodoc]] RagModel
+    - forward
+RagSequenceForGeneration
+[[autodoc]] RagSequenceForGeneration
+    - forward
+    - generate
+RagTokenForGeneration
+[[autodoc]] RagTokenForGeneration
+    - forward
+    - generate
+
+TFRagModel
+[[autodoc]] TFRagModel
+    - call
+TFRagSequenceForGeneration
+[[autodoc]] TFRagSequenceForGeneration
+    - call
+    - generate
+TFRagTokenForGeneration
+[[autodoc]] TFRagTokenForGeneration
+    - call
+    - generate
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8c959884687df5ad6c2b19c3637d11c2deb91d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+REALM
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..21c0cef520f8b4dfce40283e11dee8c49cc0f138
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Overview
+The REALM model was proposed in REALM: Retrieval-Augmented Language Model Pre-Training by Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasupat and Ming-Wei Chang. It's a
+retrieval-augmented language model that firstly retrieves documents from a textual knowledge corpus and then
+utilizes retrieved documents to process question answering tasks.
+The abstract from the paper is the following:
+Language model pre-training has been shown to capture a surprising amount of world knowledge, crucial for NLP tasks
+such as question answering. However, this knowledge is stored implicitly in the parameters of a neural network,
+requiring ever-larger networks to cover more facts. To capture knowledge in a more modular and interpretable way, we
+augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend
+over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. For the
+first time, we show how to pre-train such a knowledge retriever in an unsupervised manner, using masked language
+modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents. We
+demonstrate the effectiveness of Retrieval-Augmented Language Model pre-training (REALM) by fine-tuning on the
+challenging task of Open-domain Question Answering (Open-QA). We compare against state-of-the-art models for both
+explicit and implicit knowledge storage on three popular Open-QA benchmarks, and find that we outperform all previous
+methods by a significant margin (4-16% absolute accuracy), while also providing qualitative benefits such as
+interpretability and modularity.
+This model was contributed by qqaatw. The original code can be found
+here.
+RealmConfig
+[[autodoc]] RealmConfig
+RealmTokenizer
+[[autodoc]] RealmTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_encode_candidates
+RealmTokenizerFast
+[[autodoc]] RealmTokenizerFast
+    - batch_encode_candidates
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78290081e3a862bfeefed614c5db4efbcb890881
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_realm.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+RealmRetriever
+[[autodoc]] RealmRetriever
+RealmEmbedder
+[[autodoc]] RealmEmbedder
+    - forward
+RealmScorer
+[[autodoc]] RealmScorer
+    - forward
+RealmKnowledgeAugEncoder
+[[autodoc]] RealmKnowledgeAugEncoder
+    - forward
+RealmReader
+[[autodoc]] RealmReader
+    - forward
+RealmForOpenQA
+[[autodoc]] RealmForOpenQA
+    - block_embedding_to
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0c81ba40661338f645d8570cf0397a3478d14fe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_recurrent_gemma.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+RecurrentGemma
+Overview
+The Recurrent Gemma model was proposed in RecurrentGemma: Moving Past Transformers for Efficient Open Language Models by the Griffin, RLHF and Gemma Teams of Google.
+The abstract from the paper is the following:
+We introduce RecurrentGemma, an open language model which uses Google’s novel Griffin architecture. Griffin combines linear recurrences with local attention to achieve excellent performance on language. It has a fixed-sized state, which reduces memory use and enables efficient inference on long sequences. We provide a pre-trained model with 2B non-embedding parameters, and an instruction tuned variant. Both models achieve comparable performance to Gemma-2B despite being trained on fewer tokens.
+Tips:
+
+The original checkpoints can be converted using the conversion script src/transformers/models/recurrent_gemma/convert_recurrent_gemma_weights_to_hf.py. 
+
+This model was contributed by Arthur Zucker. The original code can be found here.
+RecurrentGemmaConfig
+[[autodoc]] RecurrentGemmaConfig
+RecurrentGemmaModel
+[[autodoc]] RecurrentGemmaModel
+    - forward
+RecurrentGemmaForCausalLM
+[[autodoc]] RecurrentGemmaForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..761a50c7dbd478012e38d2d666a40199c2c089cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Reformer
+
+Overview
+The Reformer model was proposed in the paper Reformer: The Efficient Transformer by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
+The abstract from the paper is the following:
+Large Transformer models routinely achieve state-of-the-art results on a number of tasks but training these models can
+be prohibitively costly, especially on long sequences. We introduce two techniques to improve the efficiency of
+Transformers. For one, we replace dot-product attention by one that uses locality-sensitive hashing, changing its
+complexity from O(L^2) to O(Llog(L)), where L is the length of the sequence. Furthermore, we use reversible residual
+layers instead of the standard residuals, which allows storing activations only once in the training process instead of
+N times, where N is the number of layers. The resulting model, the Reformer, performs on par with Transformer models
+while being much more memory-efficient and much faster on long sequences.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+Reformer does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035.
+Use Axial position encoding (see below for more details). It’s a mechanism to avoid having a huge positional encoding matrix (when the sequence length is very big) by factorizing it into smaller matrices.
+Replace traditional attention by LSH (local-sensitive hashing) attention (see below for more details). It’s a technique to avoid computing the full product query-key in the attention layers.
+Avoid storing the intermediate results of each layer by using reversible transformer layers to obtain them during the backward pass (subtracting the residuals from the input of the next layer gives them back) or recomputing them for results inside a given layer (less efficient than storing them but saves memory).
+Compute the feedforward operations by chunks and not on the whole batch.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e309b884b7cbbbaaca25bdbe2d51b0c2922a67a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+Axial Positional Encodings
+Axial Positional Encodings were first implemented in Google's trax library
+and developed by the authors of this model's paper. In models that are treating very long input sequences, the
+conventional position id encodings store an embeddings vector of size \(d\) being the config.hidden_size for
+every position \(i, \ldots, n_s\), with \(n_s\) being config.max_embedding_size. This means that having
+a sequence length of \(n_s = 2^{19} \approx 0.5M\) and a config.hidden_size of \(d = 2^{10} \approx 1000\)
+would result in a position encoding matrix:
+$$X_{i,j}, \text{ with } i \in \left[1,\ldots, d\right] \text{ and } j \in \left[1,\ldots, n_s\right]$$
+which alone has over 500M parameters to store. Axial positional encodings factorize \(X_{i,j}\) into two matrices:
+$$X^{1}_{i,j}, \text{ with } i \in \left[1,\ldots, d^1\right] \text{ and } j \in \left[1,\ldots, n_s^1\right]$$
+and
+$$X^{2}_{i,j}, \text{ with } i \in \left[1,\ldots, d^2\right] \text{ and } j \in \left[1,\ldots, n_s^2\right]$$
+with:
+$$d = d^1 + d^2 \text{ and } n_s = n_s^1 \times n_s^2 .$$
+Therefore the following holds:
+$$X_{i,j} = \begin{cases}
+X^{1}{i, k}, & \text{if }\ i < d^1 \text{ with } k = j \mod n_s^1 \
+X^{2}{i - d^1, l}, & \text{if } i \ge d^1 \text{ with } l = \lfloor\frac{j}{n_s^1}\rfloor
+\end{cases}$$
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..499486a5033305dafca347ecc248af3adba0bb79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_2.txt
@@ -0,0 +1,21 @@
+\end{cases}$$
+Intuitively, this means that a position embedding vector \(x_j \in \mathbb{R}^{d}\) is now the composition of two
+factorized embedding vectors: \(x^1_{k, l} + x^2_{l, k}\), where as the config.max_embedding_size dimension
+\(j\) is factorized into \(k \text{ and } l\). This design ensures that each position embedding vector
+\(x_j\) is unique.
+Using the above example again, axial position encoding with \(d^1 = 2^9, d^2 = 2^9, n_s^1 = 2^9, n_s^2 = 2^{10}\)
+can drastically reduced the number of parameters from 500 000 000 to \(2^{18} + 2^{19} \approx 780 000\) parameters, this means 85% less memory usage.
+In practice, the parameter config.axial_pos_embds_dim is set to a tuple \((d^1, d^2)\) which sum has to be
+equal to config.hidden_size and config.axial_pos_shape is set to a tuple \((n_s^1, n_s^2)\) which
+product has to be equal to config.max_embedding_size, which during training has to be equal to the sequence
+length of the input_ids.
+LSH Self Attention
+In Locality sensitive hashing (LSH) self attention the key and query projection weights are tied. Therefore, the key
+query embedding vectors are also tied. LSH self attention uses the locality sensitive hashing mechanism proposed in
+Practical and Optimal LSH for Angular Distance to assign each of the tied key
+query embedding vectors to one of config.num_buckets possible buckets. The premise is that the more "similar"
+key query embedding vectors (in terms of cosine similarity) are to each other, the more likely they are assigned to
+the same bucket.
+The accuracy of the LSH mechanism can be improved by increasing config.num_hashes or directly the argument
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c379ff8a4eef279ed87e26c1d73e7a0b0af1f8bb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+num_hashes of the forward function so that the output of the LSH self attention better approximates the output
+of the "normal" full self attention. The buckets are then sorted and chunked into query key embedding vector chunks
+each of length config.lsh_chunk_length. For each chunk, the query embedding vectors attend to its key vectors
+(which are tied to themselves) and to the key embedding vectors of config.lsh_num_chunks_before previous
+neighboring chunks and config.lsh_num_chunks_after following neighboring chunks.
+For more information, see the original Paper or this great blog post.
+Note that config.num_buckets can also be factorized into a list \((n_{\text{buckets}}^1,
+n_{\text{buckets}}^2)\). This way instead of assigning the query key embedding vectors to one of \((1,\ldots,
+n_{\text{buckets}})\) they are assigned to one of \((1-1,\ldots, n_{\text{buckets}}^1-1, \ldots,
+1-n_{\text{buckets}}^2, \ldots, n_{\text{buckets}}^1-n_{\text{buckets}}^2)\). This is crucial for very long sequences to
+save memory.
+When training a model from scratch, it is recommended to leave config.num_buckets=None, so that depending on the
+sequence length a good value for num_buckets is calculated on the fly. This value will then automatically be
+saved in the config and should be reused for inference.
+Using LSH self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Local Self Attention
+Local self attention is essentially a "normal" self attention layer with key, query and value projections, but is
+chunked so that in each chunk of length config.local_chunk_length the query embedding vectors only attends to
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..941868870232afbfd769ec029e41974de7f7955f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+the key embedding vectors in its chunk and to the key embedding vectors of config.local_num_chunks_before
+previous neighboring chunks and config.local_num_chunks_after following neighboring chunks.
+Using Local self attention, the memory and time complexity of the query-key matmul operation can be reduced from
+\(\mathcal{O}(n_s \times n_s)\) to \(\mathcal{O}(n_s \times \log(n_s))\), which usually represents the memory
+and time bottleneck in a transformer model, with \(n_s\) being the sequence length.
+Training
+During training, we must ensure that the sequence length is set to a value that can be divided by the least common
+multiple of config.lsh_chunk_length and config.local_chunk_length and that the parameters of the Axial
+Positional Encodings are correctly set as described above. Reformer is very memory efficient so that the model can
+easily be trained on sequences as long as 64000 tokens.
+For training, the [ReformerModelWithLMHead] should be used as follows:
+python
+input_ids = tokenizer.encode("This is a sentence from the training data", return_tensors="pt")
+loss = model(input_ids, labels=input_ids)[0]
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b800c3693156700086fda9513ced77e561d7369
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_reformer.txt_chunk_5.txt
@@ -0,0 +1,27 @@
+Text classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+
+ReformerConfig
+[[autodoc]] ReformerConfig
+ReformerTokenizer
+[[autodoc]] ReformerTokenizer
+    - save_vocabulary
+ReformerTokenizerFast
+[[autodoc]] ReformerTokenizerFast
+ReformerModel
+[[autodoc]] ReformerModel
+    - forward
+ReformerModelWithLMHead
+[[autodoc]] ReformerModelWithLMHead
+    - forward
+ReformerForMaskedLM
+[[autodoc]] ReformerForMaskedLM
+    - forward
+ReformerForSequenceClassification
+[[autodoc]] ReformerForSequenceClassification
+    - forward
+ReformerForQuestionAnswering
+[[autodoc]] ReformerForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b1ee2a6906f430de12590d14a47f998faf50eda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+RegNet
+Overview
+The RegNet model was proposed in Designing Network Design Spaces by Ilija Radosavovic, Raj Prateek Kosaraju, Ross Girshick, Kaiming He, Piotr Dollár.
+The authors design search spaces to perform Neural Architecture Search (NAS). They first start from a high dimensional search space and iteratively reduce the search space by empirically applying constraints based on the best-performing models sampled by the current search space.
+The abstract from the paper is the following:
+In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
+This model was contributed by Francesco. The TensorFlow version of the model
+was contributed by sayakpaul and ariG23498.
+The original code can be found here.
+The huge 10B model from Self-supervised Pretraining of Visual Features in the Wild, 
+trained on  one billion Instagram images, is available on the hub
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RegNet.
+
+[RegNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+RegNetConfig
+[[autodoc]] RegNetConfig
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1be18d6accaf0be07867f1e9882d7a8ad5db5780
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_regnet.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+RegNetModel
+[[autodoc]] RegNetModel
+    - forward
+RegNetForImageClassification
+[[autodoc]] RegNetForImageClassification
+    - forward
+
+TFRegNetModel
+[[autodoc]] TFRegNetModel
+    - call
+TFRegNetForImageClassification
+[[autodoc]] TFRegNetForImageClassification
+    - call
+
+FlaxRegNetModel
+[[autodoc]] FlaxRegNetModel
+    - call
+FlaxRegNetForImageClassification
+[[autodoc]] FlaxRegNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03fc7a6773c19b63174208dec86f306f1ec4c0c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+RemBERT
+Overview
+The RemBERT model was proposed in Rethinking Embedding Coupling in Pre-trained Language Models by Hyung Won Chung, Thibault Févry, Henry Tsai, Melvin Johnson, Sebastian Ruder.
+The abstract from the paper is the following:
+We re-evaluate the standard practice of sharing weights between input and output embeddings in state-of-the-art
+pre-trained language models. We show that decoupled embeddings provide increased modeling flexibility, allowing us to
+significantly improve the efficiency of parameter allocation in the input embedding of multilingual models. By
+reallocating the input embedding parameters in the Transformer layers, we achieve dramatically better performance on
+standard natural language understanding tasks with the same number of parameters during fine-tuning. We also show that
+allocating additional capacity to the output embedding provides benefits to the model that persist through the
+fine-tuning stage even though the output embedding is discarded after pre-training. Our analysis shows that larger
+output embeddings prevent the model's last layers from overspecializing to the pre-training task and encourage
+Transformer representations to be more general and more transferable to other tasks and languages. Harnessing these
+findings, we are able to train models that achieve strong performance on the XTREME benchmark without increasing the
+number of parameters at the fine-tuning stage.
+Usage tips
+For fine-tuning, RemBERT can be thought of as a bigger version of mBERT with an ALBERT-like factorization of the
+embedding layer. The embeddings are not tied in pre-training, in contrast with BERT, which enables smaller input
+embeddings (preserved during fine-tuning) and bigger output embeddings (discarded at fine-tuning). The tokenizer is
+also similar to the Albert one rather than the BERT one.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e699ed2ad23ad7cdb9deb4bd2c8eeb2cbffb9d88
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rembert.txt_chunk_1.txt
@@ -0,0 +1,65 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RemBertConfig
+[[autodoc]] RemBertConfig
+RemBertTokenizer
+[[autodoc]] RemBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RemBertTokenizerFast
+[[autodoc]] RemBertTokenizerFast
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+
+RemBertModel
+[[autodoc]] RemBertModel
+    - forward
+RemBertForCausalLM
+[[autodoc]] RemBertForCausalLM
+    - forward
+RemBertForMaskedLM
+[[autodoc]] RemBertForMaskedLM
+    - forward
+RemBertForSequenceClassification
+[[autodoc]] RemBertForSequenceClassification
+    - forward
+RemBertForMultipleChoice
+[[autodoc]] RemBertForMultipleChoice
+    - forward
+RemBertForTokenClassification
+[[autodoc]] RemBertForTokenClassification
+    - forward
+RemBertForQuestionAnswering
+[[autodoc]] RemBertForQuestionAnswering
+    - forward
+
+TFRemBertModel
+[[autodoc]] TFRemBertModel
+    - call
+TFRemBertForMaskedLM
+[[autodoc]] TFRemBertForMaskedLM
+    - call
+TFRemBertForCausalLM
+[[autodoc]] TFRemBertForCausalLM
+    - call
+TFRemBertForSequenceClassification
+[[autodoc]] TFRemBertForSequenceClassification
+    - call
+TFRemBertForMultipleChoice
+[[autodoc]] TFRemBertForMultipleChoice
+    - call
+TFRemBertForTokenClassification
+[[autodoc]] TFRemBertForTokenClassification
+    - call
+TFRemBertForQuestionAnswering
+[[autodoc]] TFRemBertForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70972c04fe306e4ec5c9a52da5de2c8b9ba3ba6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+ResNet
+Overview
+The ResNet model was proposed in Deep Residual Learning for Image Recognition by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. Our implementation follows the small changes made by Nvidia, we apply the stride=2 for downsampling in bottleneck's 3x3 conv and not in the first 1x1. This is generally known as "ResNet v1.5".
+ResNet introduced residual connections, they allow to train networks with an unseen number of layers (up to 1000). ResNet won the 2015 ILSVRC & COCO competition, one important milestone in deep computer vision.
+The abstract from the paper is the following:
+Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers.
+The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC & COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.
+The figure below illustrates the architecture of ResNet. Taken from the original paper.
+
+This model was contributed by Francesco. The TensorFlow version of this model was added by amyeroberts. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ResNet.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f498a755f20d9a50beac3605cee70b8e124b1a2d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_resnet.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+[ResNetForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ResNetConfig
+[[autodoc]] ResNetConfig
+
+ResNetModel
+[[autodoc]] ResNetModel
+    - forward
+ResNetForImageClassification
+[[autodoc]] ResNetForImageClassification
+    - forward
+
+TFResNetModel
+[[autodoc]] TFResNetModel
+    - call
+TFResNetForImageClassification
+[[autodoc]] TFResNetForImageClassification
+    - call
+
+FlaxResNetModel
+[[autodoc]] FlaxResNetModel
+    - call
+FlaxResNetForImageClassification
+[[autodoc]] FlaxResNetForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_retribert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f51a9c3d6525e0bb395eb5e924ba7871aaaece
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_retribert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+RetriBERT
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The RetriBERT model was proposed in the blog post Explain Anything Like I'm Five: A Model for Open Domain Long Form
+Question Answering. RetriBERT is a small model that uses either a single or
+pair of BERT encoders with lower-dimension projection for dense semantic indexing of text.
+This model was contributed by yjernite. Code to train and use the model can be
+found here.
+RetriBertConfig
+[[autodoc]] RetriBertConfig
+RetriBertTokenizer
+[[autodoc]] RetriBertTokenizer
+RetriBertTokenizerFast
+[[autodoc]] RetriBertTokenizerFast
+RetriBertModel
+[[autodoc]] RetriBertModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4a7981c5644342481ca5ee106b2f32bba3a653
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+RoBERTa-PreLayerNorm
+Overview
+The RoBERTa-PreLayerNorm model was proposed in fairseq: A Fast, Extensible Toolkit for Sequence Modeling by Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, Michael Auli.
+It is identical to using the --encoder-normalize-before flag in fairseq.
+The abstract from the paper is the following:
+fairseq is an open-source sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling, and other text generation tasks. The toolkit is based on PyTorch and supports distributed training across multiple GPUs and machines. We also support fast mixed-precision training and inference on modern GPUs.
+This model was contributed by andreasmaden.
+The original code can be found here.
+Usage tips
+
+The implementation is the same as Roberta except instead of using Add and Norm it does Norm and Add. Add and Norm refers to the Addition and LayerNormalization as described in Attention Is All You Need.
+This is identical to using the --encoder-normalize-before flag in fairseq.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RobertaPreLayerNormConfig
+[[autodoc]] RobertaPreLayerNormConfig
+
+RobertaPreLayerNormModel
+[[autodoc]] RobertaPreLayerNormModel
+    - forward
+RobertaPreLayerNormForCausalLM
+[[autodoc]] RobertaPreLayerNormForCausalLM
+    - forward
+RobertaPreLayerNormForMaskedLM
+[[autodoc]] RobertaPreLayerNormForMaskedLM
+    - forward
+RobertaPreLayerNormForSequenceClassification
+[[autodoc]] RobertaPreLayerNormForSequenceClassification
+    - forward
+RobertaPreLayerNormForMultipleChoice
+[[autodoc]] RobertaPreLayerNormForMultipleChoice
+    - forward
+RobertaPreLayerNormForTokenClassification
+[[autodoc]] RobertaPreLayerNormForTokenClassification
+    - forward
+RobertaPreLayerNormForQuestionAnswering
+[[autodoc]] RobertaPreLayerNormForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c41d32f625548395ba0859a04932a17288cf5a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta-prelayernorm.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+TFRobertaPreLayerNormModel
+[[autodoc]] TFRobertaPreLayerNormModel
+    - call
+TFRobertaPreLayerNormForCausalLM
+[[autodoc]] TFRobertaPreLayerNormForCausalLM
+    - call
+TFRobertaPreLayerNormForMaskedLM
+[[autodoc]] TFRobertaPreLayerNormForMaskedLM
+    - call
+TFRobertaPreLayerNormForSequenceClassification
+[[autodoc]] TFRobertaPreLayerNormForSequenceClassification
+    - call
+TFRobertaPreLayerNormForMultipleChoice
+[[autodoc]] TFRobertaPreLayerNormForMultipleChoice
+    - call
+TFRobertaPreLayerNormForTokenClassification
+[[autodoc]] TFRobertaPreLayerNormForTokenClassification
+    - call
+TFRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] TFRobertaPreLayerNormForQuestionAnswering
+    - call
+
+FlaxRobertaPreLayerNormModel
+[[autodoc]] FlaxRobertaPreLayerNormModel
+    - call
+FlaxRobertaPreLayerNormForCausalLM
+[[autodoc]] FlaxRobertaPreLayerNormForCausalLM
+    - call
+FlaxRobertaPreLayerNormForMaskedLM
+[[autodoc]] FlaxRobertaPreLayerNormForMaskedLM
+    - call
+FlaxRobertaPreLayerNormForSequenceClassification
+[[autodoc]] FlaxRobertaPreLayerNormForSequenceClassification
+    - call
+FlaxRobertaPreLayerNormForMultipleChoice
+[[autodoc]] FlaxRobertaPreLayerNormForMultipleChoice
+    - call
+FlaxRobertaPreLayerNormForTokenClassification
+[[autodoc]] FlaxRobertaPreLayerNormForTokenClassification
+    - call
+FlaxRobertaPreLayerNormForQuestionAnswering
+[[autodoc]] FlaxRobertaPreLayerNormForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a89744b0a29a5d30fafcdc6faede3a6774f3824
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+RoBERTa
+
+Overview
+The RoBERTa model was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer
+Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google's BERT model released in 2018.
+It builds on BERT and modifies key hyperparameters, removing the next-sentence pretraining objective and training with
+much larger mini-batches and learning rates.
+The abstract from the paper is the following:
+Language model pretraining has led to significant performance gains but careful comparison between different
+approaches is challenging. Training is computationally expensive, often done on private datasets of different sizes,
+and, as we will show, hyperparameter choices have significant impact on the final results. We present a replication
+study of BERT pretraining (Devlin et al., 2019) that carefully measures the impact of many key hyperparameters and
+training data size. We find that BERT was significantly undertrained, and can match or exceed the performance of every
+model published after it. Our best model achieves state-of-the-art results on GLUE, RACE and SQuAD. These results
+highlight the importance of previously overlooked design choices, and raise questions about the source of recently
+reported improvements. We release our models and code.
+This model was contributed by julien-c. The original code can be found here.
+Usage tips
+
+This implementation is the same as [BertModel] with a tiny embeddings tweak as well as a setup
+  for Roberta pretrained models.
+RoBERTa has the same architecture as BERT, but uses a byte-level BPE as a tokenizer (same as GPT-2) and uses a
+  different pretraining scheme.
+RoBERTa doesn't have token_type_ids, you don't need to indicate which token belongs to which segment. Just
+  separate your segments with the separation token tokenizer.sep_token (or </s>)
+
+Same as BERT with better pretraining tricks:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13aac6570a7533cdf96e4980156b4fc8c376b470
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_1.txt
@@ -0,0 +1,31 @@
+Same as BERT with better pretraining tricks:
+
+dynamic masking: tokens are masked differently at each epoch, whereas BERT does it once and for all
+together to reach 512 tokens (so the sentences are in an order than may span several documents)
+train with larger batches
+use BPE with bytes as a subunit and not characters (because of unicode characters)
+CamemBERT is a wrapper around RoBERTa. Refer to this page for usage examples.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog on Getting Started with Sentiment Analysis on Twitter using RoBERTa and the Inference API.
+A blog on Opinion Classification with Kili and Hugging Face AutoTrain using RoBERTa.
+A notebook on how to finetune RoBERTa for sentiment analysis. 🌎
+[RobertaForSequenceClassification] is supported by this example script and notebook.
+[TFRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification task guide
+
+[RobertaForTokenClassification] is supported by this example script and notebook.
+[TFRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+A blog on How to train a new language model from scratch using Transformers and Tokenizers with RoBERTa.
+[RobertaForMaskedLM] is supported by this example script and notebook.
+[TFRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19db38e48f5fdae070aa24118d47fda5e175458e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_2.txt
@@ -0,0 +1,44 @@
+A blog on Accelerated Inference with Optimum and Transformers Pipelines with RoBERTa for question answering.
+[RobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+- [RobertaForMultipleChoice] is supported by this example script and notebook.
+- [TFRobertaForMultipleChoice] is supported by this example script and notebook.
+- Multiple choice task guide
+RobertaConfig
+[[autodoc]] RobertaConfig
+RobertaTokenizer
+[[autodoc]] RobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RobertaTokenizerFast
+[[autodoc]] RobertaTokenizerFast
+    - build_inputs_with_special_tokens
+
+RobertaModel
+[[autodoc]] RobertaModel
+    - forward
+RobertaForCausalLM
+[[autodoc]] RobertaForCausalLM
+    - forward
+RobertaForMaskedLM
+[[autodoc]] RobertaForMaskedLM
+    - forward
+RobertaForSequenceClassification
+[[autodoc]] RobertaForSequenceClassification
+    - forward
+RobertaForMultipleChoice
+[[autodoc]] RobertaForMultipleChoice
+    - forward
+RobertaForTokenClassification
+[[autodoc]] RobertaForTokenClassification
+    - forward
+RobertaForQuestionAnswering
+[[autodoc]] RobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9418139fec3f9c1aa837262954e3b0baa37aa6c1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roberta.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+TFRobertaModel
+[[autodoc]] TFRobertaModel
+    - call
+TFRobertaForCausalLM
+[[autodoc]] TFRobertaForCausalLM
+    - call
+TFRobertaForMaskedLM
+[[autodoc]] TFRobertaForMaskedLM
+    - call
+TFRobertaForSequenceClassification
+[[autodoc]] TFRobertaForSequenceClassification
+    - call
+TFRobertaForMultipleChoice
+[[autodoc]] TFRobertaForMultipleChoice
+    - call
+TFRobertaForTokenClassification
+[[autodoc]] TFRobertaForTokenClassification
+    - call
+TFRobertaForQuestionAnswering
+[[autodoc]] TFRobertaForQuestionAnswering
+    - call
+
+FlaxRobertaModel
+[[autodoc]] FlaxRobertaModel
+    - call
+FlaxRobertaForCausalLM
+[[autodoc]] FlaxRobertaForCausalLM
+    - call
+FlaxRobertaForMaskedLM
+[[autodoc]] FlaxRobertaForMaskedLM
+    - call
+FlaxRobertaForSequenceClassification
+[[autodoc]] FlaxRobertaForSequenceClassification
+    - call
+FlaxRobertaForMultipleChoice
+[[autodoc]] FlaxRobertaForMultipleChoice
+    - call
+FlaxRobertaForTokenClassification
+[[autodoc]] FlaxRobertaForTokenClassification
+    - call
+FlaxRobertaForQuestionAnswering
+[[autodoc]] FlaxRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd81299970ba0e88bdb22c2a6cbdba7f76ae636e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+RoCBert
+Overview
+The RoCBert model was proposed in RoCBert: Robust Chinese Bert with Multimodal Contrastive Pretraining  by HuiSu, WeiweiShi, XiaoyuShen, XiaoZhou, TuoJi, JiaruiFang, JieZhou.
+It's a pretrained Chinese language model that is robust under various forms of adversarial attacks.
+The abstract from the paper is the following:
+Large-scale pretrained language models have achieved SOTA results on NLP tasks. However, they have been shown
+vulnerable to adversarial attacks especially for logographic languages like Chinese. In this work, we propose
+ROCBERT: a pretrained Chinese Bert that is robust to various forms of adversarial attacks like word perturbation,
+synonyms, typos, etc. It is pretrained with the contrastive learning objective which maximizes the label consistency
+under different synthesized adversarial examples. The model takes as input multimodal information including the
+semantic, phonetic and visual features. We show all these features are important to the model robustness since the
+attack can be performed in all the three forms. Across 5 Chinese NLU tasks, ROCBERT outperforms strong baselines under
+three blackbox adversarial algorithms without sacrificing the performance on clean testset. It also performs the best
+in the toxic content detection task under human-made attacks.
+This model was contributed by weiweishi.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab4fbde502b559ee9db22e3c76452ac32e9e88d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roc_bert.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoCBertConfig
+[[autodoc]] RoCBertConfig
+    - all
+RoCBertTokenizer
+[[autodoc]] RoCBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoCBertModel
+[[autodoc]] RoCBertModel
+    - forward
+RoCBertForPreTraining
+[[autodoc]] RoCBertForPreTraining
+    - forward
+RoCBertForCausalLM
+[[autodoc]] RoCBertForCausalLM
+    - forward
+RoCBertForMaskedLM
+[[autodoc]] RoCBertForMaskedLM
+    - forward
+RoCBertForSequenceClassification
+[[autodoc]] transformers.RoCBertForSequenceClassification
+    - forward
+RoCBertForMultipleChoice
+[[autodoc]] transformers.RoCBertForMultipleChoice
+    - forward
+RoCBertForTokenClassification
+[[autodoc]] transformers.RoCBertForTokenClassification
+    - forward
+RoCBertForQuestionAnswering
+[[autodoc]] RoCBertForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c02de65875656a14df77ed8bfba0431b62ccf260
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+RoFormer
+Overview
+The RoFormer model was proposed in RoFormer: Enhanced Transformer with Rotary Position Embedding by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+The abstract from the paper is the following:
+Position encoding in transformer architecture provides supervision for dependency modeling between elements at
+different positions in the sequence. We investigate various methods to encode positional information in
+transformer-based language models and propose a novel implementation named Rotary Position Embedding(RoPE). The
+proposed RoPE encodes absolute positional information with rotation matrix and naturally incorporates explicit relative
+position dependency in self-attention formulation. Notably, RoPE comes with valuable properties such as flexibility of
+being expand to any sequence lengths, decaying inter-token dependency with increasing relative distances, and
+capability of equipping the linear self-attention with relative position encoding. As a result, the enhanced
+transformer with rotary position embedding, or RoFormer, achieves superior performance in tasks with long texts. We
+release the theoretical analysis along with some preliminary experiment results on Chinese data. The undergoing
+experiment for English benchmark will soon be updated.
+This model was contributed by junnyu. The original code can be found here.
+Usage tips
+RoFormer is a BERT-like autoencoding model with rotary position embeddings. Rotary position embeddings have shown 
+improved performance on classification tasks with long texts.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+RoFormerConfig
+[[autodoc]] RoFormerConfig
+RoFormerTokenizer
+[[autodoc]] RoFormerTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+RoFormerTokenizerFast
+[[autodoc]] RoFormerTokenizerFast
+    - build_inputs_with_special_tokens
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a70ad979e076aff83e356e9239c32abb94172cbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_roformer.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+RoFormerModel
+[[autodoc]] RoFormerModel
+    - forward
+RoFormerForCausalLM
+[[autodoc]] RoFormerForCausalLM
+    - forward
+RoFormerForMaskedLM
+[[autodoc]] RoFormerForMaskedLM
+    - forward
+RoFormerForSequenceClassification
+[[autodoc]] RoFormerForSequenceClassification
+    - forward
+RoFormerForMultipleChoice
+[[autodoc]] RoFormerForMultipleChoice
+    - forward
+RoFormerForTokenClassification
+[[autodoc]] RoFormerForTokenClassification
+    - forward
+RoFormerForQuestionAnswering
+[[autodoc]] RoFormerForQuestionAnswering
+    - forward
+
+TFRoFormerModel
+[[autodoc]] TFRoFormerModel
+    - call
+TFRoFormerForMaskedLM
+[[autodoc]] TFRoFormerForMaskedLM
+    - call
+TFRoFormerForCausalLM
+[[autodoc]] TFRoFormerForCausalLM
+    - call
+TFRoFormerForSequenceClassification
+[[autodoc]] TFRoFormerForSequenceClassification
+    - call
+TFRoFormerForMultipleChoice
+[[autodoc]] TFRoFormerForMultipleChoice
+    - call
+TFRoFormerForTokenClassification
+[[autodoc]] TFRoFormerForTokenClassification
+    - call
+TFRoFormerForQuestionAnswering
+[[autodoc]] TFRoFormerForQuestionAnswering
+    - call
+
+FlaxRoFormerModel
+[[autodoc]] FlaxRoFormerModel
+    - call
+FlaxRoFormerForMaskedLM
+[[autodoc]] FlaxRoFormerForMaskedLM
+    - call
+FlaxRoFormerForSequenceClassification
+[[autodoc]] FlaxRoFormerForSequenceClassification
+    - call
+FlaxRoFormerForMultipleChoice
+[[autodoc]] FlaxRoFormerForMultipleChoice
+    - call
+FlaxRoFormerForTokenClassification
+[[autodoc]] FlaxRoFormerForTokenClassification
+    - call
+FlaxRoFormerForQuestionAnswering
+[[autodoc]] FlaxRoFormerForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afd2d98859024ea32ad8ee27b887ee740ce30ec3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+RT-DETR
+Overview
+The RT-DETR model was proposed in DETRs Beat YOLOs on Real-time Object Detection by Wenyu Lv, Yian Zhao, Shangliang Xu, Jinman Wei, Guanzhong Wang, Cheng Cui, Yuning Du, Qingqing Dang, Yi Liu.
+RT-DETR is an object detection model that stands for "Real-Time DEtection Transformer." This model is designed to perform object detection tasks with a focus on achieving real-time performance while maintaining high accuracy. Leveraging the transformer architecture, which has gained significant popularity in various fields of deep learning, RT-DETR processes images to identify and locate multiple objects within them.
+The abstract from the paper is the following:
+Recently, end-to-end transformer-based detectors (DETRs) have achieved remarkable performance. However, the issue of the high computational cost of DETRs has not been effectively addressed, limiting their practical application and preventing them from fully exploiting the benefits of no post-processing, such as non-maximum suppression (NMS). In this paper, we first analyze the influence of NMS in modern real-time object detectors on inference speed, and establish an end-to-end speed benchmark. To avoid the inference delay caused by NMS, we propose a Real-Time DEtection TRansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..becc12419f2a1bd64ffb0fed723074ae06eabe16
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_1.txt
@@ -0,0 +1,3 @@
+The model version was contributed by rafaelpadilla and sangbumchoi. The original code can be found here.
+Usage tips
+Initially, an image is processed using a pre-trained convolutional neural network, specifically a Resnet-D variant as referenced in the original code. This network extracts features from the final three layers of the architecture. Following this, a hybrid encoder is employed to convert the multi-scale features into a sequential array of image features. Then, a decoder, equipped with auxiliary prediction heads is used to refine the object queries. This process facilitates the direct generation of bounding boxes, eliminating the need for any additional post-processing to acquire the logits and coordinates for the bounding boxes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a96f9238545fd4d10bcc0c38b1c6717ae2841b23
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rt_detr.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+from PIL import Image
+import json
+import torch
+import requests
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg' 
+image = Image.open(requests.get(url, stream=True).raw)
+image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+inputs = image_processor(images=image, return_tensors="pt")
+with torch.no_grad():
+   outputs = model(**inputs)
+results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]), threshold=0.3)
+
+RTDetrConfig
+[[autodoc]] RTDetrConfig
+RTDetrResNetConfig
+[[autodoc]] RTDetrResNetConfig
+RTDetrImageProcessor
+[[autodoc]] RTDetrImageProcessor
+    - preprocess
+    - post_process_object_detection
+RTDetrModel
+[[autodoc]] RTDetrModel
+    - forward
+RTDetrForObjectDetection
+[[autodoc]] RTDetrForObjectDetection
+    - forward
+RTDetrResNetBackbone
+[[autodoc]] RTDetrResNetBackbone
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c94c627454ccfdaa837104df49828f3574aece
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+RWKV
+Overview
+The RWKV model was proposed in this repo
+It suggests a tweak in the traditional Transformer attention to make it linear. This way, the model can be used as recurrent network: passing inputs for timestamp 0 and timestamp 1 together is the same as passing inputs at timestamp 0, then inputs at timestamp 1 along with the state of timestamp 0 (see example below).
+This can be more efficient than a regular Transformer and can deal with sentence of any length (even if the model uses a fixed context length for training).
+This model was contributed by sgugger.
+The original code can be found here.
+Usage example
+
+import torch
+from transformers import AutoTokenizer, RwkvConfig, RwkvModel
+model = RwkvModel.from_pretrained("sgugger/rwkv-430M-pile")
+tokenizer = AutoTokenizer.from_pretrained("sgugger/rwkv-430M-pile")
+inputs = tokenizer("This is an example.", return_tensors="pt")
+Feed everything to the model
+outputs = model(inputs["input_ids"])
+output_whole = outputs.last_hidden_state
+outputs = model(inputs["input_ids"][:, :2])
+output_one = outputs.last_hidden_state
+Using the state computed on the first inputs, we will get the same output
+outputs = model(inputs["input_ids"][:, 2:], state=outputs.state)
+output_two = outputs.last_hidden_state
+torch.allclose(torch.cat([output_one, output_two], dim=1), output_whole, atol=1e-5)
+
+If you want to make sure the model stops generating when '\n\n' is detected, we recommend using the following stopping criteria:
+thon 
+from transformers import StoppingCriteria
+class RwkvStoppingCriteria(StoppingCriteria):
+    def init(self, eos_sequence = [187,187], eos_token_id = 537):
+        self.eos_sequence = eos_sequence
+        self.eos_token_id = eos_token_id
+def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+    last_2_ids = input_ids[:,-2:].tolist()
+    return self.eos_sequence in last_2_ids
+
+output = model.generate(inputs["input_ids"], max_new_tokens=64, stopping_criteria = [RwkvStoppingCriteria()])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a94b18bfeab467c93b05ea3ed2211a506531eb41
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+RwkvConfig
+[[autodoc]] RwkvConfig
+RwkvModel
+[[autodoc]] RwkvModel
+    - forward
+RwkvLMHeadModel
+[[autodoc]] RwkvForCausalLM
+    - forward
+Rwkv attention and the recurrent formulas
+In a traditional auto-regressive Transformer, attention is written as
+$$O = \hbox{softmax}(QK^{T} / \sqrt{d}) V$$
+with \(Q\), \(K\) and \(V\) are matrices of shape seq_len x hidden_size named query, key and value (they are actually bigger matrices with a batch dimension and an attention head dimension but we're only interested in the last two, which is where the matrix product is taken, so for the sake of simplicity we only consider those two). The product \(QK^{T}\) then has shape seq_len x seq_len and we can take the matrix product with \(V\) to get the output \(O\) of the same shape as the others.  
+Replacing the softmax by its value gives:
+$$O_{i} = \frac{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}} V_{j}}{\sum_{j=1}^{i} e^{Q_{i} K_{j}^{T} / \sqrt{d}}}$$
+Note that the entries in \(QK^{T}\) corresponding to \(j > i\) are masked (the sum stops at j) because the attention is not allowed to look at future tokens (only past ones).
+In comparison, the RWKV attention is given by
+$$O_{i} = \sigma(R_{i}) \frac{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}} V_{j}}{\sum_{j=1}^{i} e^{W_{i-j} + K_{j}}}$$
+where \(R\) is a new matrix called receptance by the author, \(K\) and \(V\) are still the key and value (\(\sigma\) here is the sigmoid function). \(W\) is a new vector that represents the position of the token and is given by
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3ab7ac8671fd738ce9a48c25731b9c0cf39ecd9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+$$W_{0} = u \hbox{  and  } W_{k} = (k-1)w \hbox{ for } k \geq 1$$
+with \(u\) and \(w\) learnable parameters called in the code time_first and time_decay respectively. The numerator and denominator can both be expressed recursively. Naming them \(N_{i}\) and \(D_{i}\) we have:
+$$N_{i} = e^{u + K_{i}} V_{i} + \hat{N}{i} \hbox{  where  } \hat{N}{i} = e^{K_{i-1}} V_{i-1} + e^{w + K_{i-2}} V_{i-2} \cdots + e^{(i-2)w + K_{1}} V_{1}$$
+so \(\hat{N}_{i}\) (called numerator_state in the code) satisfies
+$$\hat{N}{0} = 0 \hbox{  and  } \hat{N}{j+1} = e^{K_{j}} V_{j} + e^{w} \hat{N}_{j}$$
+and
+$$D_{i} = e^{u + K_{i}} + \hat{D}{i} \hbox{  where  } \hat{D}{i} = e^{K_{i-1}} + e^{w + K_{i-2}} \cdots + e^{(i-2)w + K_{1}}$$
+so \(\hat{D}_{i}\) (called denominator_state in the code) satisfies
+$$\hat{D}{0} = 0 \hbox{  and  } \hat{D}{j+1} = e^{K_{j}} + e^{w} \hat{D}_{j}$$
+The actual recurrent formula used are a tiny bit more complex, as for numerical stability we don't want to compute exponentials of big numbers. Usually the softmax is not computed as is, but the exponential of the maximum term is divided of the numerator and denominator:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e967be3615056d9dc47e5838118434b97308d72e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_3.txt
@@ -0,0 +1,12 @@
+$$\frac{e^{x_{i}}}{\sum_{j=1}^{n} e^{x_{j}}} = \frac{e^{x_{i} - M}}{\sum_{j=1}^{n} e^{x_{j} - M}}$$
+with \(M\) the maximum of all \(x_{j}\). So here on top of saving the numerator state (\(\hat{N}\)) and the denominator state (\(\hat{D}\)) we also keep track of the maximum of all terms encountered in the exponentials. So we actually use
+$$\tilde{N}{i} = e^{-M{i}} \hat{N}{i} \hbox{  and  } \tilde{D}{i} = e^{-M_{i}} \hat{D}_{i}$$
+defined by the following recurrent formulas:
+$$\tilde{N}{0} = 0 \hbox{  and  } \tilde{N}{j+1} = e^{K_{j} - q} V_{j} + e^{w + M_{j} - q} \tilde{N}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and
+$$\tilde{D}{0} = 0 \hbox{  and  } \tilde{D}{j+1} = e^{K_{j} - q} + e^{w + M_{j} - q} \tilde{D}{j} \hbox{  where  } q = \max(K{j}, w + M_{j})$$
+and \(M_{j+1} = q\). With those, we can then compute
+$$N_{i} = e^{u + K_{i} - q} V_{i} + e^{M_{i}} \tilde{N}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+and
+$$D_{i} = e^{u + K_{i} - q} + e^{M_{i}} \tilde{D}{i} \hbox{  where  } q = \max(u + K{i}, M_{i})$$
+which finally gives us
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7321df20e0093aeeb9c339e670b05cbe507ffce0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_rwkv.txt_chunk_4.txt
@@ -0,0 +1,2 @@
+which finally gives us
+$$O_{i} = \sigma(R_{i}) \frac{N_{i}}{D_{i}}$$
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d98df26bfb14f123244b8b23ce3963f547228c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+SAM
+Overview
+SAM (Segment Anything Model) was proposed in Segment Anything by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
+The model can be used to predict segmentation masks of any object of interest given an input image. 
+
+The abstract from the paper is the following:
+We introduce the Segment Anything (SA) project: a new task, model, and dataset for image segmentation. Using our efficient model in a data collection loop, we built the largest segmentation dataset to date (by far), with over 1 billion masks on 11M licensed and privacy respecting images. The model is designed and trained to be promptable, so it can transfer zero-shot to new image distributions and tasks. We evaluate its capabilities on numerous tasks and find that its zero-shot performance is impressive -- often competitive with or even superior to prior fully supervised results. We are releasing the Segment Anything Model (SAM) and corresponding dataset (SA-1B) of 1B masks and 11M images at https://segment-anything.com to foster research into foundation models for computer vision.
+Tips:
+
+The model predicts binary masks that states the presence or not of the object of interest given an image.
+The model predicts much better results if input 2D points and/or input bounding boxes are provided
+You can prompt multiple points for the same image, and predict a single mask. 
+Fine-tuning the model is not supported yet
+According to the paper, textual input should be also supported. However, at this time of writing this seems to be not supported according to the official repository.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f75abb01bbaa09dadd67dd98c52ac86e8f424e96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+This model was contributed by ybelkada and ArthurZ.
+The original code can be found here.
+Below is an example on how to run mask generation given an image and a 2D point:
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a49f7d7ba5cae010f7fe8a1c19320924e63c3354
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+You can also process your own masks alongside the input images in the processor to be passed to the model.
+thon
+import torch
+from PIL import Image
+import requests
+from transformers import SamModel, SamProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
+mask_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
+segmentation_map = Image.open(requests.get(mask_url, stream=True).raw).convert("1")
+input_points = [[[450, 600]]]  # 2D location of a window in the image
+inputs = processor(raw_image, input_points=input_points, segmentation_maps=segmentation_map, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu()
+)
+scores = outputs.iou_scores
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SAM.
+
+Demo notebook for using the model.
+Demo notebook for using the automatic mask generation pipeline.
+Demo notebook for inference with MedSAM, a fine-tuned version of SAM on the medical domain. 🌎
+Demo notebook for fine-tuning the model on custom data. 🌎
+
+SlimSAM
+SlimSAM, a pruned version of SAM, was proposed in 0.1% Data Makes Segment Anything Slim by Zigeng Chen et al. SlimSAM reduces the size of the SAM models considerably while maintaining the same performance.
+Checkpoints can be found on the hub, and they can be used as a drop-in replacement of SAM.
+Grounded SAM
+One can combine Grounding DINO with SAM for text-based mask generation as introduced in Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. You can refer to this demo notebook 🌍 for details.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd47b54ba56be7a49109407e5bf1a9efcbc0842b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sam.txt_chunk_3.txt
@@ -0,0 +1,19 @@
+Grounded SAM overview. Taken from the original repository. 
+SamConfig
+[[autodoc]] SamConfig
+SamVisionConfig
+[[autodoc]] SamVisionConfig
+SamMaskDecoderConfig
+[[autodoc]] SamMaskDecoderConfig
+SamPromptEncoderConfig
+[[autodoc]] SamPromptEncoderConfig
+SamProcessor
+[[autodoc]] SamProcessor
+SamImageProcessor
+[[autodoc]] SamImageProcessor
+SamModel
+[[autodoc]] SamModel
+    - forward
+TFSamModel
+[[autodoc]] TFSamModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c300658687aa288bce6745cf0a9fcd8e3badeda1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_0.txt
@@ -0,0 +1,12 @@
+SeamlessM4T
+Overview
+The SeamlessM4T model was proposed in SeamlessM4T — Massively Multilingual & Multimodal Machine Translation by the Seamless Communication team from Meta AI.
+This is the version 1 release of the model. For the updated version 2 release, refer to the Seamless M4T v2 docs.
+SeamlessM4T is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text.
+SeamlessM4T enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87ea11ab015a5c034313f33bacfa454cf3211e3f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
+
+[SeamlessM4TModel] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
+What does it take to create the Babel Fish, a tool that can help individuals translate speech between any two languages? While recent breakthroughs in text-based models have pushed machine translation coverage beyond 200 languages, unified speech-to-speech translation models have yet to achieve similar strides. More specifically, conventional speech-to-speech translation systems rely on cascaded systems that perform translation progressively, putting high-performing unified systems out of reach. To address these gaps, we introduce SeamlessM4T, a single model that supports speech-to-speech translation, speech-to-text translation, text-to-speech translation, text-to-text translation, and automatic speech recognition for up to 100 languages. To build this, we used 1 million hours of open speech audio data to learn self-supervised speech representations with w2v-BERT 2.0. Subsequently, we created a multimodal corpus of automatically aligned speech translations. Filtered and combined with human-labeled and pseudo-labeled data, we developed the first multilingual system capable of translating from and into English for both speech and text. On FLEURS, SeamlessM4T sets a new standard for translations into multiple target languages, achieving an improvement of 20% BLEU over the previous SOTA in direct speech-to-text translation. Compared to strong cascaded models, SeamlessM4T improves the quality of into-English translation by 1.3 BLEU points in speech-to-text and by 2.6 ASR-BLEU points in speech-to-speech. Tested for robustness, our system performs better against background noises and speaker variations in speech-to-text tasks compared to the current SOTA model. Critically, we evaluated SeamlessM4T on gender bias and added toxicity to assess translation safety. Finally, all contributions in this work are open-sourced and accessible at https://github.com/facebookresearch/seamless_communication
+Usage
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c534206252ae7db420d19e97b1b29dbf8d2db9a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+from transformers import AutoProcessor, SeamlessM4TModel
+processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
+model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English test as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4TModel] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4TModel.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..17b18e6e85a4ad7aa9e8038f735eea18080bc6d1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+Tips
+1. Use dedicated models
+[SeamlessM4TModel] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4TForSpeechToSpeech
+model = SeamlessM4TForSpeechToSpeech.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4TForTextToText
+model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
+
+Feel free to try out [SeamlessM4TForSpeechToText] and [SeamlessM4TForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the spkr_id argument. Some spkr_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for speech and text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, speech_do_sample=True) which will successively perform beam-search decoding on the text model, and multinomial sampling on the speech model.
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4TModel] to return both speech and text !
+Model architecture
+SeamlessM4T features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Here's how the generation process works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47ef151e8784ae0dde60c2a9248f72f6bd6e1a96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t.txt_chunk_4.txt
@@ -0,0 +1,49 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, following a standard encoder-decoder structure, generates unit tokens.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4TModel
+[[autodoc]] SeamlessM4TModel
+    - generate
+SeamlessM4TForTextToSpeech
+[[autodoc]] SeamlessM4TForTextToSpeech
+    - generate
+SeamlessM4TForSpeechToSpeech
+[[autodoc]] SeamlessM4TForSpeechToSpeech
+    - generate
+SeamlessM4TForTextToText
+[[autodoc]] transformers.SeamlessM4TForTextToText
+    - forward
+    - generate
+SeamlessM4TForSpeechToText
+[[autodoc]] transformers.SeamlessM4TForSpeechToText
+    - forward
+    - generate
+SeamlessM4TConfig
+[[autodoc]] SeamlessM4TConfig
+SeamlessM4TTokenizer
+[[autodoc]] SeamlessM4TTokenizer
+    - call
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SeamlessM4TTokenizerFast
+[[autodoc]] SeamlessM4TTokenizerFast
+    - call
+SeamlessM4TFeatureExtractor
+[[autodoc]] SeamlessM4TFeatureExtractor
+    - call
+SeamlessM4TProcessor
+[[autodoc]] SeamlessM4TProcessor
+    - call
+SeamlessM4TCodeHifiGan
+[[autodoc]] SeamlessM4TCodeHifiGan
+SeamlessM4THifiGan
+[[autodoc]] SeamlessM4THifiGan
+SeamlessM4TTextToUnitModel
+[[autodoc]] SeamlessM4TTextToUnitModel
+SeamlessM4TTextToUnitForConditionalGeneration
+[[autodoc]] SeamlessM4TTextToUnitForConditionalGeneration
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c57f9e859e9abda037ca8d6296870efce9d210f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+SeamlessM4T-v2
+Overview
+The SeamlessM4T-v2 model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+SeamlessM4T-v2 is a collection of models designed to provide high quality translation, allowing people from different linguistic communities to communicate effortlessly through speech and text. It is an improvement on the previous version. For more details on the differences between v1 and v2, refer to section Difference with SeamlessM4T-v1.
+SeamlessM4T-v2 enables multiple tasks without relying on separate models:
+
+Speech-to-speech translation (S2ST)
+Speech-to-text translation (S2TT)
+Text-to-speech translation (T2ST)
+Text-to-text translation (T2TT)
+Automatic speech recognition (ASR)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd5bbaf9c64439289ba5555dcc6d2ddae8d86ce7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+[SeamlessM4Tv2Model] can perform all the above tasks, but each task also has its own dedicated sub-model.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efb9ba6d1d0d7a1939e67d68cb3fc8bc531509fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..919993aeb7f3686ede5ca2143ff8a8564f5c0a1b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+Usage
+In the following example, we'll load an Arabic audio sample and an English text sample and convert them into Russian speech and French text.
+First, load the processor and a checkpoint of the model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7eb342952b5be82b509dc119e781a9dcab30d42b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_4.txt
@@ -0,0 +1,36 @@
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
+
+You can seamlessly use this model on text or on audio, to generated either translated text or translated audio.
+Here is how to use the processor to process text and audio:
+thon
+
+let's load an audio sample from an Arabic speech corpus
+from datasets import load_dataset
+dataset = load_dataset("arabic_speech_corpus", split="test", streaming=True)
+audio_sample = next(iter(dataset))["audio"]
+now, process it
+audio_inputs = processor(audios=audio_sample["array"], return_tensors="pt")
+now, process some English text as well
+text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
+
+Speech
+[SeamlessM4Tv2Model] can seamlessly generate text or speech with few or no changes. Let's target Russian voice translation:
+thon
+
+audio_array_from_text = model.generate(text_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+audio_array_from_audio = model.generate(audio_inputs, tgt_lang="rus")[0].cpu().numpy().squeeze()
+
+With basically the same code, I've translated English text and Arabic speech to Russian speech samples.
+Text
+Similarly, you can generate translated text from audio files or from text with the same model. You only have to pass generate_speech=False to [SeamlessM4Tv2Model.generate].
+This time, let's translate to French.
+thon 
+
+from audio
+output_tokens = model.generate(**audio_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_audio = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+from text
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text_from_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e31363d8be2759afa1d6cee383627b3c5327192c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+Tips
+1. Use dedicated models
+[SeamlessM4Tv2Model] is transformers top level model to generate speech and text, but you can also use dedicated models that perform the task without additional components, thus reducing the memory footprint.
+For example, you can replace the audio-to-audio generation snippet with the model dedicated to the S2ST task, the rest is exactly the same code: 
+thon
+
+from transformers import SeamlessM4Tv2ForSpeechToSpeech
+model = SeamlessM4Tv2ForSpeechToSpeech.from_pretrained("facebook/seamless-m4t-v2-large")
+
+Or you can replace the text-to-text generation snippet with the model dedicated to the T2TT task, you only have to remove generate_speech=False.
+thon
+
+from transformers import SeamlessM4Tv2ForTextToText
+model = SeamlessM4Tv2ForTextToText.from_pretrained("facebook/seamless-m4t-v2-large")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f713d7a2abd3afcc4f625227b8224f5ac9e8c616
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_6.txt
@@ -0,0 +1,18 @@
+Feel free to try out [SeamlessM4Tv2ForSpeechToText] and [SeamlessM4Tv2ForTextToSpeech] as well.
+2. Change the speaker identity
+You have the possibility to change the speaker used for speech synthesis with the speaker_id argument. Some speaker_id works better than other for some languages!
+3. Change the generation strategy
+You can use different generation strategies for text generation, e.g .generate(input_ids=input_ids, text_num_beams=4, text_do_sample=True) which will perform multinomial beam-search decoding on the text model. Note that speech generation only supports greedy - by default - or multinomial sampling, which can be used with e.g. .generate(, speech_do_sample=True, speech_temperature=0.6).
+4. Generate speech and text at the same time
+Use return_intermediate_token_ids=True with [SeamlessM4Tv2Model] to return both speech and text !
+Model architecture
+SeamlessM4T-v2 features a versatile architecture that smoothly handles the sequential generation of text and speech. This setup comprises two sequence-to-sequence (seq2seq) models. The first model translates the input modality into translated text, while the second model generates speech tokens, known as "unit tokens," from the translated text.
+Each modality has its own dedicated encoder with a unique architecture. Additionally, for speech output, a vocoder inspired by the HiFi-GAN architecture is placed on top of the second seq2seq model.
+Difference with SeamlessM4T-v1
+The architecture of this new version differs from the first in a few aspects:
+Improvements on the second-pass model
+The second seq2seq model, named text-to-unit model, is now non-auto regressive, meaning that it computes units in a single forward pass. This achievement is made possible by:
+- the use of character-level embeddings, meaning that each character of the predicted translated text has its own embeddings, which are then used to predict the unit tokens.
+- the use of an intermediate duration predictor, that predicts speech duration at the character-level on the predicted translated text.
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e10baceccfcdc9d21cd2ceb8cab788430fa8b83e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_7.txt
@@ -0,0 +1,8 @@
+- the use of a new text-to-unit decoder mixing convolutions and self-attention to handle longer context.
+Difference in the speech encoder
+The speech encoder, which is used during the first-pass generation process to predict the translated text, differs mainly from the previous speech encoder through these mechanisms:
+- the use of chunked attention mask to prevent attention across chunks, ensuring that each position attends only to positions within its own chunk and a fixed number of previous chunks.
+- the use of relative position embeddings which only considers distance between sequence elements rather than absolute positions. Please refer to Self-Attentionwith Relative Position Representations (Shaw et al.) for more details.
+- the use of a causal depth-wise convolution instead of a non-causal one.
+Generation process
+Here's how the generation process works:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..337eec2ea5884a2cc605aad178510cba4f1c2d30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seamless_m4t_v2.txt_chunk_8.txt
@@ -0,0 +1,25 @@
+Input text or speech is processed through its specific encoder.
+A decoder creates text tokens in the desired language.
+If speech generation is required, the second seq2seq model, generates unit tokens in an non auto-regressive way.
+These unit tokens are then passed through the final vocoder to produce the actual speech.
+
+This model was contributed by ylacombe. The original code can be found here.
+SeamlessM4Tv2Model
+[[autodoc]] SeamlessM4Tv2Model
+    - generate
+SeamlessM4Tv2ForTextToSpeech
+[[autodoc]] SeamlessM4Tv2ForTextToSpeech
+    - generate
+SeamlessM4Tv2ForSpeechToSpeech
+[[autodoc]] SeamlessM4Tv2ForSpeechToSpeech
+    - generate
+SeamlessM4Tv2ForTextToText
+[[autodoc]] transformers.SeamlessM4Tv2ForTextToText
+    - forward
+    - generate
+SeamlessM4Tv2ForSpeechToText
+[[autodoc]] transformers.SeamlessM4Tv2ForSpeechToText
+    - forward
+    - generate
+SeamlessM4Tv2Config
+[[autodoc]] SeamlessM4Tv2Config
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e23e81db147809c363e55e2a883755a881e00e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+SegFormer
+Overview
+The SegFormer model was proposed in SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping
+Luo. The model consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great
+results on image segmentation benchmarks such as ADE20K and Cityscapes.
+The abstract from the paper is the following:
+We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with
+lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel
+hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding,
+thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution
+differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from
+different layers, and thus combining both local attention and global attention to render powerful representations. We
+show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our
+approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance
+and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters,
+being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on
+Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C.
+The figure below illustrates the architecture of SegFormer. Taken from the original paper.
+
+This model was contributed by nielsr. The TensorFlow version
+of the model was contributed by sayakpaul. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a94d6f6f733b77088deca158f9143841533e7e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+SegFormer consists of a hierarchical Transformer encoder, and a lightweight all-MLP decoder head.
+  [SegformerModel] is the hierarchical Transformer encoder (which in the paper is also referred to
+  as Mix Transformer or MiT). [SegformerForSemanticSegmentation] adds the all-MLP decoder head on
+  top to perform semantic segmentation of images. In addition, there's
+  [SegformerForImageClassification] which can be used to - you guessed it - classify images. The
+  authors of SegFormer first pre-trained the Transformer encoder on ImageNet-1k to classify images. Next, they throw
+  away the classification head, and replace it by the all-MLP decode head. Next, they fine-tune the model altogether on
+  ADE20K, Cityscapes and COCO-stuff, which are important benchmarks for semantic segmentation. All checkpoints can be
+  found on the hub.
+The quickest way to get started with SegFormer is by checking the example notebooks (which showcase both inference and
+  fine-tuning on custom data). One can also check out the blog post introducing SegFormer and illustrating how it can be fine-tuned on custom data.
+TensorFlow users should refer to this repository that shows off-the-shelf inference and fine-tuning.
+One can also check out this interactive demo on Hugging Face Spaces
+  to try out a SegFormer model on custom images.
+SegFormer works on any input size, as it pads the input to be divisible by config.patch_sizes.
+One can use [SegformerImageProcessor] to prepare images and corresponding segmentation maps
+  for the model. Note that this image processor is fairly basic and does not include all data augmentations used in
+  the original paper. The original preprocessing pipelines (for the ADE20k dataset for instance) can be found here. The most
+  important preprocessing step is that images and segmentation maps are randomly cropped and padded to the same size,
+  such as 512x512 or 640x640, after which they are normalized.
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..974005fe87f32ee7bf21b148308c012330f1a60c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+One additional thing to keep in mind is that one can initialize [SegformerImageProcessor] with
+  do_reduce_labels set to True or False. In some datasets (like ADE20k), the 0 index is used in the annotated
+  segmentation maps for background. However, ADE20k doesn't include the "background" class in its 150 labels.
+  Therefore, do_reduce_labels is used to reduce all labels by 1, and to make sure no loss is computed for the
+  background class (i.e. it replaces 0 in the annotated maps by 255, which is the ignore_index of the loss function
+  used by [SegformerForSemanticSegmentation]). However, other datasets use the 0 index as
+  background class and include this class as part of all labels. In that case, do_reduce_labels should be set to
+  False, as loss should also be computed for the background class.
+As most models, SegFormer comes in different sizes, the details of which can be found in the table below
+  (taken from Table 7 of the original paper).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b0d2850c7f9e56e137aec83e479265723eaeecb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_3.txt
@@ -0,0 +1,17 @@
+| Model variant | Depths    | Hidden sizes    | Decoder hidden size | Params (M) | ImageNet-1k Top 1 |
+| :---------------: | ------------- | ------------------- | :---------------------: | :------------: | :-------------------: |
+| MiT-b0            | [2, 2, 2, 2]  | [32, 64, 160, 256]  | 256                     | 3.7            | 70.5                  |
+| MiT-b1            | [2, 2, 2, 2]  | [64, 128, 320, 512] | 256                     | 14.0           | 78.7                  |
+| MiT-b2            | [3, 4, 6, 3]  | [64, 128, 320, 512] | 768                     | 25.4           | 81.6                  |
+| MiT-b3            | [3, 4, 18, 3] | [64, 128, 320, 512] | 768                     | 45.2           | 83.1                  |
+| MiT-b4            | [3, 8, 27, 3] | [64, 128, 320, 512] | 768                     | 62.6           | 83.6                  |
+| MiT-b5            | [3, 6, 40, 3] | [64, 128, 320, 512] | 768                     | 82.0           | 83.8                  |
+Note that MiT in the above table refers to the Mix Transformer encoder backbone introduced in SegFormer. For
+SegFormer's results on the segmentation datasets like ADE20k, refer to the paper.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SegFormer.
+
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b1577a9e8f05baf213c783f01b32b1e80da025
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_segformer.txt_chunk_4.txt
@@ -0,0 +1,48 @@
+[SegformerForImageClassification] is supported by this example script and notebook.
+Image classification task guide
+
+Semantic segmentation:
+
+[SegformerForSemanticSegmentation] is supported by this example script.
+A blog on fine-tuning SegFormer on a custom dataset can be found here.
+More demo notebooks on SegFormer (both inference + fine-tuning on a custom dataset) can be found here.
+[TFSegformerForSemanticSegmentation] is supported by this example notebook.
+Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SegformerConfig
+[[autodoc]] SegformerConfig
+SegformerFeatureExtractor
+[[autodoc]] SegformerFeatureExtractor
+    - call
+    - post_process_semantic_segmentation
+SegformerImageProcessor
+[[autodoc]] SegformerImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+
+SegformerModel
+[[autodoc]] SegformerModel
+    - forward
+SegformerDecodeHead
+[[autodoc]] SegformerDecodeHead
+    - forward
+SegformerForImageClassification
+[[autodoc]] SegformerForImageClassification
+    - forward
+SegformerForSemanticSegmentation
+[[autodoc]] SegformerForSemanticSegmentation
+    - forward
+
+TFSegformerDecodeHead
+[[autodoc]] TFSegformerDecodeHead
+    - call
+TFSegformerModel
+[[autodoc]] TFSegformerModel
+    - call
+TFSegformerForImageClassification
+[[autodoc]] TFSegformerForImageClassification
+    - call
+TFSegformerForSemanticSegmentation
+[[autodoc]] TFSegformerForSemanticSegmentation
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5d093f90e4d339aecae9f3f0394f4e48f7372a2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+SegGPT
+Overview
+The SegGPT model was proposed in SegGPT: Segmenting Everything In Context by Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, Tiejun Huang. SegGPT employs a decoder-only Transformer that can generate a segmentation mask given an input image, a prompt image and its corresponding prompt mask. The model achieves remarkable one-shot results with 56.1 mIoU on COCO-20 and 85.6 mIoU on FSS-1000.
+The abstract from the paper is the following:
+We present SegGPT, a generalist model for segmenting everything in context. We unify various segmentation tasks into a generalist in-context learning framework that accommodates different kinds of segmentation data by transforming them into the same format of images. The training of SegGPT is formulated as an in-context coloring problem with random color mapping for each data sample. The objective is to accomplish diverse tasks according to the context, rather than relying on specific colors. After training, SegGPT can perform arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text. SegGPT is evaluated on a broad range of tasks, including few-shot semantic segmentation, video object segmentation, semantic segmentation, and panoptic segmentation. Our results show strong capabilities in segmenting in-domain and out-of
+Tips:
+- One can use [SegGptImageProcessor] to prepare image input, prompt and mask to the model.
+- One can either use segmentation maps or RGB images as prompt masks. If using the latter make sure to set do_convert_rgb=False in the preprocess method.
+- It's highly advisable to pass num_labels when using segmetantion_maps (not considering background) during preprocessing and postprocessing with [SegGptImageProcessor] for your use case.
+- When doing inference with [SegGptForImageSegmentation] if your batch_size is greater than 1 you can use feature ensemble across your images by passing feature_ensemble=True in the forward method.
+Here's how to use the model for one-shot semantic segmentation:
+thon
+import torch
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..61ab71f987d7dac9ef0b88095c21daea3d76ce86
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+from datasets import load_dataset
+from transformers import SegGptImageProcessor, SegGptForImageSegmentation
+checkpoint = "BAAI/seggpt-vit-large"
+image_processor = SegGptImageProcessor.from_pretrained(checkpoint)
+model = SegGptForImageSegmentation.from_pretrained(checkpoint)
+dataset_id = "EduardoPacheco/FoodSeg103"
+ds = load_dataset(dataset_id, split="train")
+Number of labels in FoodSeg103 (not including background)
+num_labels = 103
+image_input = ds[4]["image"]
+ground_truth = ds[4]["label"]
+image_prompt = ds[29]["image"]
+mask_prompt = ds[29]["label"]
+inputs = image_processor(
+    images=image_input, 
+    prompt_images=image_prompt,
+    segmentation_maps=mask_prompt, 
+    num_labels=num_labels,
+    return_tensors="pt"
+)
+with torch.no_grad():
+    outputs = model(**inputs)
+target_sizes = [image_input.size[::-1]]
+mask = image_processor.post_process_semantic_segmentation(outputs, target_sizes, num_labels=num_labels)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23aa1ef570059d9a1760fc78063fb8bc7a54018c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_seggpt.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+This model was contributed by EduardoPacheco.
+The original code can be found here.
+SegGptConfig
+[[autodoc]] SegGptConfig
+SegGptImageProcessor
+[[autodoc]] SegGptImageProcessor
+    - preprocess
+    - post_process_semantic_segmentation
+SegGptModel
+[[autodoc]] SegGptModel
+    - forward
+SegGptForImageSegmentation
+[[autodoc]] SegGptForImageSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe972727f0f0b027cf0e82c32b1b3a10d2ea3c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew-d.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW-D
+Overview
+SEW-D (Squeezed and Efficient Wav2Vec with Disentangled attention) was proposed in Performance-Efficiency Trade-offs
+in Unsupervised Pre-training for Speech Recognition by Felix Wu, Kwangyoun Kim,
+Jing Pan, Kyu Han, Kilian Q. Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW-D is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWDForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWDConfig
+[[autodoc]] SEWDConfig
+SEWDModel
+[[autodoc]] SEWDModel
+    - forward
+SEWDForCTC
+[[autodoc]] SEWDForCTC
+    - forward
+SEWDForSequenceClassification
+[[autodoc]] SEWDForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..928fe7f14906fd7e0ba244a22ca146e03b76751d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_sew.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+SEW
+Overview
+SEW (Squeezed and Efficient Wav2Vec) was proposed in Performance-Efficiency Trade-offs in Unsupervised Pre-training
+for Speech Recognition by Felix Wu, Kwangyoun Kim, Jing Pan, Kyu Han, Kilian Q.
+Weinberger, Yoav Artzi.
+The abstract from the paper is the following:
+This paper is a study of performance-efficiency trade-offs in pre-trained models for automatic speech recognition
+(ASR). We focus on wav2vec 2.0, and formalize several architecture designs that influence both the model performance
+and its efficiency. Putting together all our observations, we introduce SEW (Squeezed and Efficient Wav2vec), a
+pre-trained model architecture with significant improvements along both performance and efficiency dimensions across a
+variety of training setups. For example, under the 100h-960h semi-supervised setup on LibriSpeech, SEW achieves a 1.9x
+inference speedup compared to wav2vec 2.0, with a 13.5% relative reduction in word error rate. With a similar inference
+time, SEW reduces word error rate by 25-50% across different model sizes.
+This model was contributed by anton-l.
+Usage tips
+
+SEW is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+SEWForCTC is fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+SEWConfig
+[[autodoc]] SEWConfig
+SEWModel
+[[autodoc]] SEWModel
+    - forward
+SEWForCTC
+[[autodoc]] SEWForCTC
+    - forward
+SEWForSequenceClassification
+[[autodoc]] SEWForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..62a262a6fef36db178662f5abc9f35897dd7c02c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+SigLIP
+Overview
+The SigLIP model was proposed in Sigmoid Loss for Language Image Pre-Training by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer. SigLIP proposes to replace the loss function used in CLIP by a simple pairwise sigmoid loss. This results in better performance in terms of zero-shot classification accuracy on ImageNet.
+The abstract from the paper is the following:
+We propose a simple pairwise Sigmoid loss for Language-Image Pre-training (SigLIP). Unlike standard contrastive learning with softmax normalization, the sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. The sigmoid loss simultaneously allows further scaling up the batch size, while also performing better at smaller batch sizes. Combined with Locked-image Tuning, with only four TPUv4 chips, we train a SigLiT model that achieves 84.5% ImageNet zero-shot accuracy in two days. The disentanglement of the batch size from the loss further allows us to study the impact of examples vs pairs and negative to positive ratio. Finally, we push the batch size to the extreme, up to one million, and find that the benefits of growing batch size quickly diminish, with a more reasonable batch size of 32k being sufficient.
+Usage tips
+
+Usage of SigLIP is similar to CLIP. The main difference is the training loss, which does not require a global view of all the pairwise similarities of images and texts within a batch. One needs to apply the sigmoid activation function to the logits, rather than the softmax.
+Training is not yet supported. If you want to fine-tune SigLIP or train from scratch, refer to the loss function from OpenCLIP, which leverages various torch.distributed utilities.
+When using the standalone [SiglipTokenizer] or [SiglipProcessor], make sure to pass padding="max_length" as that's how the model was trained.
+To get the same results as the pipeline, a prompt template of "This is a photo of {label}." should be used.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6baa3357867d0c61808ddcacd353d29ab27e02db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+SigLIP evaluation results compared to CLIP. Taken from the original paper.
+This model was contributed by nielsr.
+The original code can be found here.
+Usage example
+There are 2 main ways to use SigLIP: either using the pipeline API, which abstracts away all the complexity for you, or by using the SiglipModel class yourself.
+Pipeline API
+The pipeline allows to use the model in a few lines of code:
+thon
+
+from transformers import pipeline
+from PIL import Image
+import requests
+load pipe
+image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-base-patch16-224")
+load image
+url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
+image = Image.open(requests.get(url, stream=True).raw)
+inference
+candidate_labels = ["2 cats", "a plane", "a remote"]
+outputs = image_classifier(image, candidate_labels=candidate_labels)
+outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
+print(outputs)
+[{'score': 0.1979, 'label': '2 cats'}, {'score': 0.0, 'label': 'a remote'}, {'score': 0.0, 'label': 'a plane'}]
+
+Using the model yourself
+If you want to do the pre- and postprocessing yourself, here's how to do that:
+thon
+
+from PIL import Image
+import requests
+from transformers import AutoProcessor, AutoModel
+import torch
+model = AutoModel.from_pretrained("google/siglip-base-patch16-224")
+processor = AutoProcessor.from_pretrained("google/siglip-base-patch16-224")
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+candidate_labels = ["2 cats", "2 dogs"]
+
+follows the pipeline prompt template to get same results
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..635f2d4bb2d8a18df33980dfb8e71aec9b05f91d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_siglip.txt_chunk_2.txt
@@ -0,0 +1,51 @@
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+important: we pass padding=max_length since the model was trained with this
+inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+with torch.no_grad():
+     outputs = model(**inputs)
+logits_per_image = outputs.logits_per_image
+probs = torch.sigmoid(logits_per_image) # these are the probabilities
+print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
+31.9% that image 0 is 'a photo of 2 cats'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SigLIP.
+
+Zero-shot image classification task guide
+Demo notebooks for SigLIP can be found here. 🌎
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SiglipConfig
+[[autodoc]] SiglipConfig
+    - from_text_vision_configs
+SiglipTextConfig
+[[autodoc]] SiglipTextConfig
+SiglipVisionConfig
+[[autodoc]] SiglipVisionConfig
+SiglipTokenizer
+[[autodoc]] SiglipTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SiglipImageProcessor
+[[autodoc]] SiglipImageProcessor
+    - preprocess
+SiglipProcessor
+[[autodoc]] SiglipProcessor
+SiglipModel
+[[autodoc]] SiglipModel
+    - forward
+    - get_text_features
+    - get_image_features
+SiglipTextModel
+[[autodoc]] SiglipTextModel
+    - forward
+SiglipVisionModel
+[[autodoc]] SiglipVisionModel
+    - forward
+SiglipForImageClassification
+[[autodoc]] SiglipForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70016f90ad826e1f1376e0521dd82366010ee02e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Speech Encoder Decoder Models
+The [SpeechEncoderDecoderModel] can be used to initialize a speech-to-text model
+with any pretrained speech autoencoding model as the encoder (e.g. Wav2Vec2, Hubert) and any pretrained autoregressive model as the decoder.
+The effectiveness of initializing speech-sequence-to-text-sequence models with pretrained checkpoints for speech
+recognition and speech translation has e.g. been shown in Large-Scale Self- and Semi-Supervised Learning for Speech
+Translation by Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli,
+Alexis Conneau.
+An example of how to use a [SpeechEncoderDecoderModel] for inference can be seen in Speech2Text2.
+Randomly initializing SpeechEncoderDecoderModel from model configurations.
+[SpeechEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [Wav2Vec2Model] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, Wav2Vec2Config, SpeechEncoderDecoderConfig, SpeechEncoderDecoderModel
+config_encoder = Wav2Vec2Config()
+config_decoder = BertConfig()
+config = SpeechEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = SpeechEncoderDecoderModel(config=config)
+
+Initialising SpeechEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[SpeechEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based speech model, e.g. Wav2Vec2, Hubert can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [SpeechEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the SpeechEncoderDecoderModel class provides a [SpeechEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a90456bd2d617f89d7a66eb8772e3647eaf6624b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+from transformers import SpeechEncoderDecoderModel
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "facebook/hubert-large-ll60k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing SpeechEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the SpeechEncoderDecoderModel class, [SpeechEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
+
+from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import torch
+load a fine-tuned speech translation model and corresponding processor
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-en-to-15")
+let's perform inference on a piece of English speech (which we'll translate to German)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = processor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+autoregressively generate transcription (uses greedy decoding by default)
+generated_ids = model.generate(input_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+Mr. Quilter ist der Apostel der Mittelschicht und wir freuen uns, sein Evangelium willkommen heißen zu können.
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (speech, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_values (which are the
+speech inputs) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a64c31a2df5f155c9d884a60c2b32f27e937ec54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+from transformers import AutoTokenizer, AutoFeatureExtractor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+encoder_id = "facebook/wav2vec2-base-960h"  # acoustic model encoder
+decoder_id = "google-bert/bert-base-uncased"  # text decoder
+feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_id)
+tokenizer = AutoTokenizer.from_pretrained(decoder_id)
+Combine pre-trained encoder and pre-trained decoder to form a Seq2Seq model
+model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+load an audio input and pre-process (normalise mean/std to 0/1)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+input_values = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt").input_values
+load its corresponding transcription and tokenize to generate labels
+labels = tokenizer(ds[0]["text"], return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_values=input_values, labels=labels).loss
+loss.backward()
+
+SpeechEncoderDecoderConfig
+[[autodoc]] SpeechEncoderDecoderConfig
+SpeechEncoderDecoderModel
+[[autodoc]] SpeechEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+FlaxSpeechEncoderDecoderModel
+[[autodoc]] FlaxSpeechEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9e1bc615baf9eca709e75bfaa879a9191de1d192
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Speech2Text
+Overview
+The Speech2Text model was proposed in fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Dmytro Okhonko, Juan Pino. It's a
+transformer-based seq2seq (encoder-decoder) model designed for end-to-end Automatic Speech Recognition (ASR) and Speech
+Translation (ST). It uses a convolutional downsampler to reduce the length of speech inputs by 3/4th before they are
+fed into the encoder. The model is trained with standard autoregressive cross-entropy loss and generates the
+transcripts/translations autoregressively. Speech2Text has been fine-tuned on several datasets for ASR and ST:
+LibriSpeech, CoVoST 2, MuST-C.
+This model was contributed by valhalla. The original code can be found here.
+Inference
+Speech2Text is a speech model that accepts a float tensor of log-mel filter-bank features extracted from the speech
+signal. It's a transformer-based seq2seq model, so the transcripts/translations are generated autoregressively. The
+generate() method can be used for inference.
+The [Speech2TextFeatureExtractor] class is responsible for extracting the log-mel filter-bank
+features. The [Speech2TextProcessor] wraps [Speech2TextFeatureExtractor] and
+[Speech2TextTokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+The feature extractor depends on torchaudio and the tokenizer depends on sentencepiece so be sure to
+install those packages before running the examples. You could either install those as extra speech dependencies with
+pip install transformers"[speech, sentencepiece]" or install the packages separately with pip install torchaudio sentencepiece. Also torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can
+be installed as follows: apt install libsndfile1-dev
+
+ASR and Speech Translation
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34f3b55da2cb871647433c62ca75606fcdf9aa61
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+ASR and Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+transcription
+['mister quilter is the apostle of the middle classes and we are glad to welcome his gospel']
+
+Multilingual speech translation
+
+For multilingual speech translation models, eos_token_id is used as the decoder_start_token_id and
+  the target language id is forced as the first generated token. To force the target language id as the first
+  generated token, pass the forced_bos_token_id parameter to the generate() method. The following
+  example shows how to transate English speech to French text using the facebook/s2t-medium-mustc-multilingual-st
+  checkpoint.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f5c306cf71e911017db15a9e76aad55c27f5022
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text.txt_chunk_2.txt
@@ -0,0 +1,49 @@
+import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
+from datasets import load_dataset
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-mustc-multilingual-st")
+ds = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
+inputs = processor(ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], return_tensors="pt")
+generated_ids = model.generate(
+     inputs["input_features"],
+     attention_mask=inputs["attention_mask"],
+     forced_bos_token_id=processor.tokenizer.lang_code_to_id["fr"],
+ )
+translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
+translation
+["(Vidéo) Si M. Kilder est l'apossible des classes moyennes, et nous sommes heureux d'être accueillis dans son évangile."]
+
+See the model hub to look for Speech2Text checkpoints.
+Speech2TextConfig
+[[autodoc]] Speech2TextConfig
+Speech2TextTokenizer
+[[autodoc]] Speech2TextTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+Speech2TextFeatureExtractor
+[[autodoc]] Speech2TextFeatureExtractor
+    - call
+Speech2TextProcessor
+[[autodoc]] Speech2TextProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+Speech2TextModel
+[[autodoc]] Speech2TextModel
+    - forward
+Speech2TextForConditionalGeneration
+[[autodoc]] Speech2TextForConditionalGeneration
+    - forward
+
+TFSpeech2TextModel
+[[autodoc]] TFSpeech2TextModel
+    - call
+TFSpeech2TextForConditionalGeneration
+[[autodoc]] TFSpeech2TextForConditionalGeneration
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02f456270cd1d4d6adbfe596d46b920fdf702449
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_0.txt
@@ -0,0 +1,36 @@
+Speech2Text2
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+  If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+  You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The Speech2Text2 model is used together with Wav2Vec2 for Speech Translation models proposed in
+Large-Scale Self- and Semi-Supervised Learning for Speech Translation by
+Changhan Wang, Anne Wu, Juan Pino, Alexei Baevski, Michael Auli, Alexis Conneau.
+Speech2Text2 is a decoder-only transformer model that can be used with any speech encoder-only, such as
+Wav2Vec2 or HuBERT for Speech-to-Text tasks. Please refer to the
+SpeechEncoderDecoder class on how to combine Speech2Text2 with any speech encoder-only
+model.
+This model was contributed by Patrick von Platen.
+The original code can be found here.
+Usage tips
+
+Speech2Text2 achieves state-of-the-art results on the CoVoST Speech Translation dataset. For more information, see
+  the official models .
+Speech2Text2 is always used within the SpeechEncoderDecoder framework.
+Speech2Text2's tokenizer is based on fastBPE.
+
+Inference
+Speech2Text2's [SpeechEncoderDecoderModel] model accepts raw waveform input values from speech and
+makes use of [~generation.GenerationMixin.generate] to translate the input speech
+autoregressively to the target language.
+The [Wav2Vec2FeatureExtractor] class is responsible for preprocessing the input speech and
+[Speech2Text2Tokenizer] decodes the generated target tokens to the target string. The
+[Speech2Text2Processor] wraps [Wav2Vec2FeatureExtractor] and
+[Speech2Text2Tokenizer] into a single instance to both extract the input features and decode the
+predicted token ids.
+
+Step-by-step Speech Translation
+
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ac3998b1477d0cb95bf7bb1db56feaca7498a0f2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speech_to_text_2.txt_chunk_1.txt
@@ -0,0 +1,57 @@
+Step-by-step Speech Translation
+
+thon
+
+import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
+from datasets import load_dataset
+import soundfile as sf
+model = SpeechEncoderDecoderModel.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(batch):
+     speech, _ = sf.read(batch["file"])
+     batch["speech"] = speech
+     return batch
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+ds = ds.map(map_to_array)
+inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
+generated_ids = model.generate(inputs=inputs["input_values"], attention_mask=inputs["attention_mask"])
+transcription = processor.batch_decode(generated_ids)
+
+Speech Translation via Pipelines
+
+The automatic speech recognition pipeline can also be used to translate speech in just a couple lines of code
+thon
+
+from datasets import load_dataset
+from transformers import pipeline
+librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+asr = pipeline(
+     "automatic-speech-recognition",
+     model="facebook/s2t-wav2vec2-large-en-de",
+     feature_extractor="facebook/s2t-wav2vec2-large-en-de",
+ )
+translation_de = asr(librispeech_en[0]["file"])
+
+See model hub to look for Speech2Text2 checkpoints.
+Resources
+
+Causal language modeling task guide
+
+Speech2Text2Config
+[[autodoc]] Speech2Text2Config
+Speech2TextTokenizer
+[[autodoc]] Speech2Text2Tokenizer
+    - batch_decode
+    - decode
+    - save_vocabulary
+Speech2Text2Processor
+[[autodoc]] Speech2Text2Processor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Speech2Text2ForCausalLM
+[[autodoc]] Speech2Text2ForCausalLM
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..516c85338bb4e504e23cc2d440528ed3a91b84a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+SpeechT5
+Overview
+The SpeechT5 model was proposed in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
+The abstract from the paper is the following:
+Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
+This model was contributed by Matthijs. The original code can be found here.
+SpeechT5Config
+[[autodoc]] SpeechT5Config
+SpeechT5HifiGanConfig
+[[autodoc]] SpeechT5HifiGanConfig
+SpeechT5Tokenizer
+[[autodoc]] SpeechT5Tokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+SpeechT5FeatureExtractor
+[[autodoc]] SpeechT5FeatureExtractor
+    - call
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..980fabc713bee9f7f416d58dced389a00123398f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_speecht5.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+SpeechT5Processor
+[[autodoc]] SpeechT5Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+SpeechT5Model
+[[autodoc]] SpeechT5Model
+    - forward
+SpeechT5ForSpeechToText
+[[autodoc]] SpeechT5ForSpeechToText
+    - forward
+SpeechT5ForTextToSpeech
+[[autodoc]] SpeechT5ForTextToSpeech
+    - forward
+    - generate
+SpeechT5ForSpeechToSpeech
+[[autodoc]] SpeechT5ForSpeechToSpeech
+    - forward
+    - generate_speech
+SpeechT5HifiGan
+[[autodoc]] SpeechT5HifiGan
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70503f22c2bbc73eae37f28d6b82967faadfd8e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+Splinter
+Overview
+The Splinter model was proposed in Few-Shot Question Answering by Pretraining Span Selection by Ori Ram, Yuval Kirstain, Jonathan Berant, Amir Globerson, Omer Levy. Splinter
+is an encoder-only transformer (similar to BERT) pretrained using the recurring span selection task on a large corpus
+comprising Wikipedia and the Toronto Book Corpus.
+The abstract from the paper is the following:
+In several question answering benchmarks, pretrained models have reached human parity through fine-tuning on an order
+of 100,000 annotated questions and answers. We explore the more realistic few-shot setting, where only a few hundred
+training examples are available, and observe that standard models perform poorly, highlighting the discrepancy between
+current pretraining objectives and question answering. We propose a new pretraining scheme tailored for question
+answering: recurring span selection. Given a passage with multiple sets of recurring spans, we mask in each set all
+recurring spans but one, and ask the model to select the correct span in the passage for each masked span. Masked spans
+are replaced with a special token, viewed as a question representation, that is later used during fine-tuning to select
+the answer span. The resulting model obtains surprisingly good results on multiple benchmarks (e.g., 72.7 F1 on SQuAD
+with only 128 training examples), while maintaining competitive performance in the high-resource setting.
+This model was contributed by yuvalkirstain and oriram. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dee5f5a8a9182aaa097d26f304361526ba301cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_splinter.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+Splinter was trained to predict answers spans conditioned on a special [QUESTION] token. These tokens contextualize
+  to question representations which are used to predict the answers. This layer is called QASS, and is the default
+  behaviour in the [SplinterForQuestionAnswering] class. Therefore:
+Use [SplinterTokenizer] (rather than [BertTokenizer]), as it already
+  contains this special token. Also, its default behavior is to use this token when two sequences are given (for
+  example, in the run_qa.py script).
+If you plan on using Splinter outside run_qa.py, please keep in mind the question token - it might be important for
+  the success of your model, especially in a few-shot setting.
+Please note there are two different checkpoints for each size of Splinter. Both are basically the same, except that
+  one also has the pretrained weights of the QASS layer (tau/splinter-base-qass and tau/splinter-large-qass) and one
+  doesn't (tau/splinter-base and tau/splinter-large). This is done to support randomly initializing this layer at
+  fine-tuning, as it is shown to yield better results for some cases in the paper.
+
+Resources
+
+Question answering task guide
+
+SplinterConfig
+[[autodoc]] SplinterConfig
+SplinterTokenizer
+[[autodoc]] SplinterTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SplinterTokenizerFast
+[[autodoc]] SplinterTokenizerFast
+SplinterModel
+[[autodoc]] SplinterModel
+    - forward
+SplinterForQuestionAnswering
+[[autodoc]] SplinterForQuestionAnswering
+    - forward
+SplinterForPreTraining
+[[autodoc]] SplinterForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a9681ccddfc5848138ff8775de93731394147de0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+SqueezeBERT
+Overview
+The SqueezeBERT model was proposed in SqueezeBERT: What can computer vision teach NLP about efficient neural networks? by Forrest N. Iandola, Albert E. Shaw, Ravi Krishna, Kurt W. Keutzer. It's a
+bidirectional transformer similar to the BERT model. The key difference between the BERT architecture and the
+SqueezeBERT architecture is that SqueezeBERT uses grouped convolutions
+instead of fully-connected layers for the Q, K, V and FFN layers.
+The abstract from the paper is the following:
+Humans read and write hundreds of billions of messages every day. Further, due to the availability of large datasets,
+large computing systems, and better neural network models, natural language processing (NLP) technology has made
+significant strides in understanding, proofreading, and organizing these messages. Thus, there is a significant
+opportunity to deploy NLP in myriad applications to help web users, social networks, and businesses. In particular, we
+consider smartphones and other mobile devices as crucial platforms for deploying NLP models at scale. However, today's
+highly-accurate NLP neural network models such as BERT and RoBERTa are extremely computationally expensive, with
+BERT-base taking 1.7 seconds to classify a text snippet on a Pixel 3 smartphone. In this work, we observe that methods
+such as grouped convolutions have yielded significant speedups for computer vision networks, but many of these
+techniques have not been adopted by NLP neural network designers. We demonstrate how to replace several operations in
+self-attention layers with grouped convolutions, and we use this technique in a novel network architecture called
+SqueezeBERT, which runs 4.3x faster than BERT-base on the Pixel 3 while achieving competitive accuracy on the GLUE test
+set. The SqueezeBERT code will be released.
+This model was contributed by forresti.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f3921dd06dafb8fb99898e9849b902642f1e5c9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_squeezebert.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+SqueezeBERT is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
+  rather than the left.
+SqueezeBERT is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore
+  efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained
+  with a causal language modeling (CLM) objective are better in that regard.
+For best results when finetuning on sequence classification tasks, it is recommended to start with the
+  squeezebert/squeezebert-mnli-headless checkpoint.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+SqueezeBertConfig
+[[autodoc]] SqueezeBertConfig
+SqueezeBertTokenizer
+[[autodoc]] SqueezeBertTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+SqueezeBertTokenizerFast
+[[autodoc]] SqueezeBertTokenizerFast
+SqueezeBertModel
+[[autodoc]] SqueezeBertModel
+SqueezeBertForMaskedLM
+[[autodoc]] SqueezeBertForMaskedLM
+SqueezeBertForSequenceClassification
+[[autodoc]] SqueezeBertForSequenceClassification
+SqueezeBertForMultipleChoice
+[[autodoc]] SqueezeBertForMultipleChoice
+SqueezeBertForTokenClassification
+[[autodoc]] SqueezeBertForTokenClassification
+SqueezeBertForQuestionAnswering
+[[autodoc]] SqueezeBertForQuestionAnswering
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..382808cc265d126c487a07c07bdccee9ebaec1ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+StableLM
+Overview
+StableLM 3B 4E1T was proposed in StableLM 3B 4E1T: Technical Report by Stability AI and is the first model in a series of multi-epoch pre-trained language models.
+Model Details
+StableLM 3B 4E1T is a decoder-only base language model pre-trained on 1 trillion tokens of diverse English and code datasets for four epochs.
+The model architecture is transformer-based with partial Rotary Position Embeddings, SwiGLU activation, LayerNorm, etc.
+We also provide StableLM Zephyr 3B, an instruction fine-tuned version of the model that can be used for chat-based applications.
+Usage Tips
+
+The architecture is similar to LLaMA but with RoPE applied to 25% of head embedding dimensions, LayerNorm instead of RMSNorm, and optional QKV bias terms.
+StableLM 3B 4E1T-based models uses the same tokenizer as [GPTNeoXTokenizerFast].
+
+StableLM 3B 4E1T and StableLM Zephyr 3B can be found on the Huggingface Hub
+The following code snippet demonstrates how to use StableLM 3B 4E1T for inference:
+thon
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model.to(device)  # doctest: +IGNORE_RESULT
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
+responses
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e6b6f629e12240150b52531795b45a74c16eabd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_stablelm.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Combining StableLM and Flash Attention 2
+First, make sure to install the latest version of Flash Attention v2.
+
+pip install -U flash-attn --no-build-isolation
+Also make sure that your hardware is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash-attn repository. Note: you must load your model in half-precision (e.g. torch.bfloat16).
+Now, to run the model with Flash Attention 2, refer to the snippet below:
+thon
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
+device = "cuda" # the device to load the model onto
+set_seed(0)
+tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")  # doctest: +SKIP
+model.to(device)  # doctest: +SKIP
+model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
+generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)  # doctest: +SKIP
+responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)  # doctest: +SKIP
+responses  # doctest: +SKIP
+['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
+
+StableLmConfig
+[[autodoc]] StableLmConfig
+StableLmModel
+[[autodoc]] StableLmModel
+    - forward
+StableLmForCausalLM
+[[autodoc]] StableLmForCausalLM
+    - forward
+StableLmForSequenceClassification
+[[autodoc]] StableLmForSequenceClassification
+    - forward
+StableLmForTokenClassification
+[[autodoc]] StableLmForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..99f69381be06e7c467fcab44336f6842fc8a5aba
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_0.txt
@@ -0,0 +1,4 @@
+Starcoder2
+Overview
+StarCoder2 is a family of open LLMs for code and comes in 3 different sizes with 3B, 7B and 15B parameters. The flagship StarCoder2-15B model is trained on over 4 trillion tokens and 600+ programming languages from The Stack v2. All models use Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and were trained using the Fill-in-the-Middle objective. The models have been released with the paper StarCoder 2 and The Stack v2: The Next Generation by Anton Lozhkov, Raymond Li, Loubna Ben Allal, Federico Cassano, Joel Lamy-Poirier, Nouamane Tazi, Ao Tang, Dmytro Pykhtar, Jiawei Liu, Yuxiang Wei, Tianyang Liu, Max Tian, Denis Kocetkov, Arthur Zucker, Younes Belkada, Zijian Wang, Qian Liu, Dmitry Abulkhanov, Indraneil Paul, Zhuang Li, Wen-Ding Li, Megan Risdal, Jia Li, Jian Zhu, Terry Yue Zhuo, Evgenii Zheltonozhskii, Nii Osae Osae Dade, Wenhao Yu, Lucas Krauß, Naman Jain, Yixuan Su, Xuanli He, Manan Dey, Edoardo Abati, Yekun Chai, Niklas Muennighoff, Xiangru Tang, Muhtasham Oblokulov, Christopher Akiki, Marc Marone, Chenghao Mou, Mayank Mishra, Alex Gu, Binyuan Hui, Tri Dao, Armel Zebaze, Olivier Dehaene, Nicolas Patry, Canwen Xu, Julian McAuley, Han Hu, Torsten Scholak, Sebastien Paquet, Jennifer Robinson, Carolyn Jane Anderson, Nicolas Chapados, Mostofa Patwary, Nima Tajbakhsh, Yacine Jernite, Carlos Muñoz Ferrandis, Lingming Zhang, Sean Hughes, Thomas Wolf, Arjun Guha, Leandro von Werra, and Harm de Vries.
+The abstract of the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f2d20f2c7128c6d0721e1d52c39ca17cf108700
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_1.txt
@@ -0,0 +1,8 @@
+The BigCode project, an open-scientific collaboration focused on the responsible development of Large Language Models for Code (Code LLMs), introduces StarCoder2. In partnership with Software Heritage (SWH), we build The Stack v2 on top of the digital commons of their source code archive. Alongside the SWH repositories spanning 619 programming languages, we carefully select other high-quality data sources, such as GitHub pull requests, Kaggle notebooks, and code documentation. This results in a training set that is 4x larger than the first StarCoder dataset. We train StarCoder2 models with 3B, 7B, and 15B parameters on 3.3 to 4.3 trillion tokens and thoroughly evaluate them on a comprehensive set of Code LLM benchmarks. We find that our small model, StarCoder2-3B, outperforms other Code LLMs of similar size on most benchmarks, and also outperforms StarCoderBase-15B. Our large model, StarCoder2- 15B, significantly outperforms other models of comparable size. In addition, it matches or outperforms CodeLlama-34B, a model more than twice its size. Although DeepSeekCoder- 33B is the best-performing model at code completion for high-resource languages, we find that StarCoder2-15B outperforms it on math and code reasoning benchmarks, as well as several low-resource languages. We make the model weights available under an OpenRAIL license and ensure full transparency regarding the training data by releasing the SoftWare Heritage persistent IDentifiers (SWHIDs) of the source code data.
+
+License
+The models are licensed under the BigCode OpenRAIL-M v1 license agreement.
+Usage tips
+The StarCoder2 models can be found in the HuggingFace hub. You can find some examples for inference and fine-tuning in StarCoder2's GitHub repo.
+These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hub:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..747ac7e32f2bfb09c974306c4858e467f858f745
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_starcoder2.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-7b", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-7b")
+prompt = "def print_hello_world():"
+model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
+tokenizer.batch_decode(generated_ids)[0]
+'def print_hello_world():\n    print("Hello World!")\n\ndef print'
+
+Starcoder2Config
+[[autodoc]] Starcoder2Config
+Starcoder2Model
+[[autodoc]] Starcoder2Model
+    - forward
+Starcoder2ForCausalLM
+[[autodoc]] Starcoder2ForCausalLM
+    - forward
+Starcoder2ForSequenceClassification
+[[autodoc]] Starcoder2ForSequenceClassification
+    - forward
+Starcoder2ForTokenClassification
+[[autodoc]] Starcoder2ForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f0579dd95842303527d009721e2dc56922f602c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+SuperPoint
+Overview
+The SuperPoint model was proposed
+in SuperPoint: Self-Supervised Interest Point Detection and Description by Daniel
+DeTone, Tomasz Malisiewicz and Andrew Rabinovich.
+This model is the result of a self-supervised training of a fully-convolutional network for interest point detection and
+description. The model is able to detect interest points that are repeatable under homographic transformations and
+provide a descriptor for each point. The use of the model in its own is limited, but it can be used as a feature
+extractor for other tasks such as homography estimation, image matching, etc.
+The abstract from the paper is the following:
+This paper presents a self-supervised framework for training interest point detectors and descriptors suitable for a
+large number of multiple-view geometry problems in computer vision. As opposed to patch-based neural networks, our
+fully-convolutional model operates on full-sized images and jointly computes pixel-level interest point locations and
+associated descriptors in one forward pass. We introduce Homographic Adaptation, a multi-scale, multi-homography
+approach for boosting interest point detection repeatability and performing cross-domain adaptation (e.g.,
+synthetic-to-real). Our model, when trained on the MS-COCO generic image dataset using Homographic Adaptation, is able
+to repeatedly detect a much richer set of interest points than the initial pre-adapted deep model and any other
+traditional corner detector. The final system gives rise to state-of-the-art homography estimation results on HPatches
+when compared to LIFT, SIFT and ORB.
+
+ SuperPoint overview. Taken from the original paper. 
+Usage tips
+Here is a quick example of using the model to detect interest points in an image:
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(image, return_tensors="pt")
+outputs = model(**inputs)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..838494b3481b88b52b930f516d4ab87315e6dcbd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+The outputs contain the list of keypoint coordinates with their respective score and description (a 256-long vector).
+You can also feed multiple images to the model. Due to the nature of SuperPoint, to output a dynamic number of keypoints,
+you will need to use the mask attribute to retrieve the respective information :
+thon
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+from PIL import Image
+import requests
+url_image_1 = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_1 = Image.open(requests.get(url_image_1, stream=True).raw)
+url_image_2 = "http://images.cocodataset.org/test-stuff2017/000000000568.jpg"
+image_2 = Image.open(requests.get(url_image_2, stream=True).raw)
+images = [image_1, image_2]
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+inputs = processor(images, return_tensors="pt")
+outputs = model(**inputs)
+for i in range(len(images)):
+    image_mask = outputs.mask[i]
+    image_indices = torch.nonzero(image_mask).squeeze()
+    image_keypoints = outputs.keypoints[i][image_indices]
+    image_scores = outputs.scores[i][image_indices]
+    image_descriptors = outputs.descriptors[i][image_indices]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a18355264556e5d70230904d4c7eda17cd4644f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_superpoint.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+You can then print the keypoints on the image to visualize the result :
+python
+import cv2
+for keypoint, score in zip(image_keypoints, image_scores):
+    keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item())
+    color = tuple([score.item() * 255] * 3)
+    image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color)
+cv2.imwrite("output_image.png", image)
+This model was contributed by stevenbucaille.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with SuperPoint. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook showcasing inference and visualization with SuperPoint can be found here. 🌎
+
+SuperPointConfig
+[[autodoc]] SuperPointConfig
+SuperPointImageProcessor
+[[autodoc]] SuperPointImageProcessor
+
+preprocess
+
+SuperPointForKeypointDetection
+[[autodoc]] SuperPointForKeypointDetection
+
+forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bf4b6cd9c8e1e558bf73a2b259e65cfa51487be
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_0.txt
@@ -0,0 +1,14 @@
+SwiftFormer
+Overview
+The SwiftFormer model was proposed in SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications by Abdelrahman Shaker, Muhammad Maaz, Hanoona Rasheed, Salman Khan, Ming-Hsuan Yang, Fahad Shahbaz Khan.
+The SwiftFormer paper introduces a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations in the self-attention computation with linear element-wise multiplications. A series of models called 'SwiftFormer' is built based on this, which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Even their small variant achieves 78.5% top-1 ImageNet1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2× faster compared to MobileViT-v2.
+The abstract from the paper is the following:
+Self-attention has become a defacto choice for capturing global context in various vision applications. However, its quadratic computational complexity with respect to image resolution limits its use in real-time applications, especially for deployment on resource-constrained mobile devices. Although hybrid approaches have been proposed to combine the advantages of convolutions and self-attention for a better speed-accuracy trade-off, the expensive matrix multiplication operations in self-attention remain a bottleneck. In this work, we introduce a novel efficient additive attention mechanism that effectively replaces the quadratic matrix multiplication operations with linear element-wise multiplications. Our design shows that the key-value interaction can be replaced with a linear layer without sacrificing any accuracy. Unlike previous state-of-the-art methods, our efficient formulation of self-attention enables its usage at all stages of the network. Using our proposed efficient additive attention, we build a series of models called "SwiftFormer" which achieves state-of-the-art performance in terms of both accuracy and mobile inference speed. Our small variant achieves 78.5% top-1 ImageNet-1K accuracy with only 0.8 ms latency on iPhone 14, which is more accurate and 2x faster compared to MobileViT-v2.
+This model was contributed by shehan97. The TensorFlow version was contributed by joaocmd.
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2805fc9c609811250c10a4ae59f6a526514964de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swiftformer.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+The original code can be found here.
+SwiftFormerConfig
+[[autodoc]] SwiftFormerConfig
+SwiftFormerModel
+[[autodoc]] SwiftFormerModel
+    - forward
+SwiftFormerForImageClassification
+[[autodoc]] SwiftFormerForImageClassification
+    - forward
+TFSwiftFormerModel
+[[autodoc]] TFSwiftFormerModel
+    - call
+TFSwiftFormerForImageClassification
+[[autodoc]] TFSwiftFormerForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de57bc36fa32c0f287725020d96ae7f1e4b18b2a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Swin Transformer
+Overview
+The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
+by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, Baining Guo.
+The abstract from the paper is the following:
+This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone
+for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains,
+such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text.
+To address these differences, we propose a hierarchical Transformer whose representation is computed with \bold{S}hifted
+\bold{win}dows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping
+local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at
+various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it
+compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense
+prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation
+(53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and
++2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones.
+The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures.
+
+ Swin Transformer architecture. Taken from the original paper.
+This model was contributed by novice03. The Tensorflow version of this model was contributed by amyeroberts. The original code can be found here.
+Usage tips
+
+Swin pads the inputs supporting any input height and width (if divisible by 32).
+Swin can be used as a backbone. When output_hidden_states = True, it will output both hidden_states and reshaped_hidden_states. The reshaped_hidden_states have a shape of (batch, num_channels, height, width) rather than (batch_size, sequence_length, num_channels).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efa247a000cda00c0fde5c6a21575224f4849d2b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer.
+
+[SwinForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[SwinForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+SwinConfig
+[[autodoc]] SwinConfig
+
+SwinModel
+[[autodoc]] SwinModel
+    - forward
+SwinForMaskedImageModeling
+[[autodoc]] SwinForMaskedImageModeling
+    - forward
+SwinForImageClassification
+[[autodoc]] transformers.SwinForImageClassification
+    - forward
+
+TFSwinModel
+[[autodoc]] TFSwinModel
+    - call
+TFSwinForMaskedImageModeling
+[[autodoc]] TFSwinForMaskedImageModeling
+    - call
+TFSwinForImageClassification
+[[autodoc]] transformers.TFSwinForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..889a6e188eb4f8efb29ddc5f1a04446117146f27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_0.txt
@@ -0,0 +1,8 @@
+Swin2SR
+Overview
+The Swin2SR model was proposed in Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration by Marcos V. Conde, Ui-Jin Choi, Maxime Burchi, Radu Timofte.
+Swin2R improves the SwinIR model by incorporating Swin Transformer v2 layers which mitigates issues such as training instability, resolution gaps between pre-training
+and fine-tuning, and hunger on data.
+The abstract from the paper is the following:
+Compression plays an important role on the efficient transmission and storage of images and videos through band-limited systems such as streaming services, virtual reality or videogames. However, compression unavoidably leads to artifacts and the loss of the original information, which may severely degrade the visual quality. For these reasons, quality enhancement of compressed images has become a popular research topic. While most state-of-the-art image restoration methods are based on convolutional neural networks, other transformers-based methods such as SwinIR, show impressive performance on these tasks.
+In this paper, we explore the novel Swin Transformer V2, to improve SwinIR for image super-resolution, and in particular, the compressed input scenario. Using this method we can tackle the major issues in training transformer vision models, such as training instability, resolution gaps between pre-training and fine-tuning, and hunger on data. We conduct experiments on three representative tasks: JPEG compression artifacts removal, image super-resolution (classical and lightweight), and compressed image super-resolution. Experimental results demonstrate that our method, Swin2SR, can improve the training convergence and performance of SwinIR, and is a top-5 solution at the "AIM 2022 Challenge on Super-Resolution of Compressed Image and Video".
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..732f33168f8c90612e1fd67119c69cb7a768a8e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swin2sr.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+Swin2SR architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+Demo notebooks for Swin2SR can be found here.
+A demo Space for image super-resolution with SwinSR can be found here.
+Swin2SRImageProcessor
+[[autodoc]] Swin2SRImageProcessor
+    - preprocess
+Swin2SRConfig
+[[autodoc]] Swin2SRConfig
+Swin2SRModel
+[[autodoc]] Swin2SRModel
+    - forward
+Swin2SRForImageSuperResolution
+[[autodoc]] Swin2SRForImageSuperResolution
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7722096b98065827f814012a42119570f1d61ee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Swin Transformer V2
+Overview
+The Swin Transformer V2 model was proposed in Swin Transformer V2: Scaling Up Capacity and Resolution by Ze Liu, Han Hu, Yutong Lin, Zhuliang Yao, Zhenda Xie, Yixuan Wei, Jia Ning, Yue Cao, Zheng Zhang, Li Dong, Furu Wei, Baining Guo.
+The abstract from the paper is the following:
+Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
+This model was contributed by nandwalritik.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Swin Transformer v2.
+
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c563329be1682442e246cf3fbd40305aa279c7f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_swinv2.txt_chunk_1.txt
@@ -0,0 +1,19 @@
+[Swinv2ForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+Besides that:
+
+[Swinv2ForMaskedImageModeling] is supported by this example script.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Swinv2Config
+[[autodoc]] Swinv2Config
+Swinv2Model
+[[autodoc]] Swinv2Model
+    - forward
+Swinv2ForMaskedImageModeling
+[[autodoc]] Swinv2ForMaskedImageModeling
+    - forward
+Swinv2ForImageClassification
+[[autodoc]] transformers.Swinv2ForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e16f8390b20ab548ba8828b3e3e6e3b3560f174
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+SwitchTransformers
+Overview
+The SwitchTransformers model was proposed in Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity by William Fedus, Barret Zoph, Noam Shazeer.
+The Switch Transformer model uses a sparse T5 encoder-decoder architecture, where the MLP are replaced by a Mixture of Experts (MoE). A routing mechanism (top 1 in this case) associates each token to one of the expert, where each expert is a dense MLP. While switch transformers have a lot more weights than their equivalent dense models, the sparsity allows better scaling and better finetuning performance at scale.
+During a forward pass, only a fraction of the weights are used. The routing mechanism allows the model to select relevant weights on the fly which increases the model capacity without increasing the number of operations.
+The abstract from the paper is the following:
+In deep learning, models typically reuse the same parameters for all inputs. Mixture of Experts (MoE) defies this and instead selects different parameters for each incoming example. The result is a sparsely-activated model -- with outrageous numbers of parameters -- but a constant computational cost. However, despite several notable successes of MoE, widespread adoption has been hindered by complexity, communication costs and training instability -- we address these with the Switch Transformer. We simplify the MoE routing algorithm and design intuitive improved models with reduced communication and computational costs. Our proposed training techniques help wrangle the instabilities and we show large sparse models may be trained, for the first time, with lower precision (bfloat16) formats. We design models based off T5-Base and T5-Large to obtain up to 7x increases in pre-training speed with the same computational resources. These improvements extend into multilingual settings where we measure gains over the mT5-Base version across all 101 languages. Finally, we advance the current scale of language models by pre-training up to trillion parameter models on the "Colossal Clean Crawled Corpus" and achieve a 4x speedup over the T5-XXL model.
+This model was contributed by Younes Belkada and Arthur Zucker.
+The original code can be found here.
+Usage tips
+
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..530dcee619df2491b7206a330ad0fb18e3acdedd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_switch_transformers.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+SwitchTransformers uses the [T5Tokenizer], which can be loaded directly from each model's repository.
+The released weights are pretrained on English Masked Language Modeling task, and should be finetuned.
+
+Resources
+
+Translation task guide
+Summarization task guide
+
+SwitchTransformersConfig
+[[autodoc]] SwitchTransformersConfig
+SwitchTransformersTop1Router
+[[autodoc]] SwitchTransformersTop1Router
+    - _compute_router_probabilities
+    - forward
+SwitchTransformersSparseMLP
+[[autodoc]] SwitchTransformersSparseMLP
+    - forward
+SwitchTransformersModel
+[[autodoc]] SwitchTransformersModel
+    - forward
+SwitchTransformersForConditionalGeneration
+[[autodoc]] SwitchTransformersForConditionalGeneration
+    - forward
+SwitchTransformersEncoderModel
+[[autodoc]] SwitchTransformersEncoderModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..123ffb35ece7a0ef159dc4e19ac41542553d8b27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+T5
+
+Overview
+The T5 model was presented in Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer by Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang,
+Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu.
+The abstract from the paper is the following:
+Transfer learning, where a model is first pre-trained on a data-rich task before being fine-tuned on a downstream
+task, has emerged as a powerful technique in natural language processing (NLP). The effectiveness of transfer learning
+has given rise to a diversity of approaches, methodology, and practice. In this paper, we explore the landscape of
+transfer learning techniques for NLP by introducing a unified framework that converts every language problem into a
+text-to-text format. Our systematic study compares pretraining objectives, architectures, unlabeled datasets, transfer
+approaches, and other factors on dozens of language understanding tasks. By combining the insights from our exploration
+with scale and our new "Colossal Clean Crawled Corpus", we achieve state-of-the-art results on many benchmarks covering
+summarization, question answering, text classification, and more. To facilitate future work on transfer learning for
+NLP, we release our dataset, pre-trained models, and code.
+All checkpoints can be found on the hub.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+T5 is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which
+each task is converted into a text-to-text format. T5 works well on a variety of tasks out-of-the-box by prepending a
+different prefix to the input corresponding to each task, e.g., for translation: translate English to German: ,
+for summarization: summarize: .
+The pretraining includes both supervised and self-supervised training. Supervised training is conducted on downstream tasks provided by the GLUE and SuperGLUE benchmarks (converting them into text-to-text tasks as explained above).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff598d20835602c355e292ebd8b000c5bb5c8f27
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_1.txt
@@ -0,0 +1,35 @@
+Self-supervised training uses corrupted tokens, by randomly removing 15% of the tokens and replacing them with individual sentinel tokens (if several consecutive tokens are marked for removal, the whole group is replaced with a single sentinel token). The input of the encoder is the corrupted sentence, the input of the decoder is the original sentence and the target is then the dropped out tokens delimited by their sentinel tokens.
+
+T5 uses relative scalar embeddings. Encoder input padding can be done on the left and on the right.
+
+See the training, inference and resources sections below for all details regarding usage.
+
+T5 comes in different sizes:
+
+google-t5/t5-small
+
+google-t5/t5-base
+
+google-t5/t5-large
+
+google-t5/t5-3b
+
+google-t5/t5-11b.
+
+Based on the original T5 model, Google has released some follow-up works:
+
+T5v1.1: T5v1.1 is an improved version of T5 with some architectural tweaks, and is pre-trained on C4 only without
+  mixing in the supervised tasks. Refer to the documentation of T5v1.1 which can be found here.
+
+mT5: mT5 is a multilingual T5 model. It is pre-trained on the mC4 corpus, which includes 101 languages. Refer to
+  the documentation of mT5 which can be found here.
+
+byT5: byT5 is a T5 model pre-trained on byte sequences rather than SentencePiece subword token sequences. Refer
+  to the documentation of byT5 which can be found here.
+
+UL2: UL2 is a T5 like model pretrained on various denoising objectives
+
+Flan-T5: Flan is a pretraining methods that is based on prompting. The Flan-T5 are T5 models trained on the Flan collection of 
+    datasets which include: taskmaster2, djaym7/wiki_dialog, deepmind/code_contests, lambada, gsm8k, aqua_rat, esnli, quasc and qed.
+
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..218f8f134f4c093f3aa3d81aa3f391071cad5686
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+FLan-UL2 : the UL2 model finetuned using the "Flan" prompt tuning and dataset collection.
+
+UMT5: UmT5 is a multilingual T5 model trained on an improved and refreshed mC4 multilingual corpus,  29 trillion characters across 107 language, using a new sampling method, UniMax. Refer to
+ the documentation of mT5 which can be found here.
+
+Training
+T5 is an encoder-decoder model and converts all NLP problems into a text-to-text format. It is trained using teacher
+forcing. This means that for training, we always need an input sequence and a corresponding target sequence. The input
+sequence is fed to the model using input_ids. The target sequence is shifted to the right, i.e., prepended by a
+start-sequence token and fed to the decoder using the decoder_input_ids. In teacher-forcing style, the target
+sequence is then appended by the EOS token and corresponds to the labels. The PAD token is hereby used as the
+start-sequence token. T5 can be trained / fine-tuned both in a supervised and unsupervised fashion.
+One can use [T5ForConditionalGeneration] (or the Tensorflow/Flax variant), which includes the
+language modeling head on top of the decoder.
+
+Unsupervised denoising training
+
+In this setup, spans of the input sequence are masked by so-called sentinel tokens (a.k.a unique mask tokens) and
+the output sequence is formed as a concatenation of the same sentinel tokens and the real masked tokens. Each
+sentinel token represents a unique mask token for this sentence and should start with <extra_id_0>,
+<extra_id_1>,  up to <extra_id_99>. As a default, 100 sentinel tokens are available in
+[T5Tokenizer].
+For instance, the sentence "The cute dog walks in the park" with the masks put on "cute dog" and "the" should be
+processed as follows:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16b0f182b9f31d39d508e99e794bb74d287fc089
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+labels = tokenizer(" cute dog  the ", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+3.7837
+
+If you're interested in pre-training T5 on a new corpus, check out the run_t5_mlm_flax.py script in the Examples
+directory.
+
+Supervised training
+
+In this setup, the input sequence and output sequence are a standard sequence-to-sequence input-output mapping.
+Suppose that we want to fine-tune the model for translation for example, and we have a training example: the input
+sequence "The house is wonderful." and output sequence "Das Haus ist wunderbar.", then they should be prepared for
+the model as follows:
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+labels = tokenizer("Das Haus ist wunderbar.", return_tensors="pt").input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(input_ids=input_ids, labels=labels).loss
+loss.item()
+0.2542
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836acbf0a847896c54c4edfe97775328d606679d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_4.txt
@@ -0,0 +1,19 @@
+As you can see, only 2 inputs are required for the model in order to compute a loss: input_ids (which are the
+input_ids of the encoded input sequence) and labels (which are the input_ids of the encoded
+target sequence). The model will automatically create the decoder_input_ids based on the labels, by
+shifting them one position to the right and prepending the config.decoder_start_token_id, which for T5 is
+equal to 0 (i.e. the id of the pad token). Also note the task prefix: we prepend the input sequence with 'translate
+English to German: ' before encoding it. This will help in improving the performance, as this task prefix was used
+during T5's pre-training.
+However, the example above only shows a single training example. In practice, one trains deep learning models in
+batches. This entails that we must pad/truncate examples to the same length. For encoder-decoder models, one
+typically defines a max_source_length and max_target_length, which determine the maximum length of the
+input and output sequences respectively (otherwise they are truncated). These should be carefully set depending on
+the task.
+In addition, we must make sure that padding token id's of the labels are not taken into account by the loss
+function. In PyTorch and Tensorflow, this can be done by replacing them with -100, which is the ignore_index
+of the CrossEntropyLoss. In Flax, one can use the decoder_attention_mask to ignore padded tokens from
+the loss (see the Flax summarization script for details). We also pass
+attention_mask as additional input to the model, which makes sure that padding tokens of the inputs are
+ignored. The code example below illustrates all of this.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94b9bac202bf0f63c34c2211d5c410e079ee8bd5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_5.txt
@@ -0,0 +1,44 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import torch
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+the following 2 hyperparameters are task-specific
+max_source_length = 512
+max_target_length = 128
+Suppose we have the following 2 training examples:
+input_sequence_1 = "Welcome to NYC"
+output_sequence_1 = "Bienvenue à NYC"
+input_sequence_2 = "HuggingFace is a company"
+output_sequence_2 = "HuggingFace est une entreprise"
+encode the inputs
+task_prefix = "translate English to French: "
+input_sequences = [input_sequence_1, input_sequence_2]
+encoding = tokenizer(
+     [task_prefix + sequence for sequence in input_sequences],
+     padding="longest",
+     max_length=max_source_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+input_ids, attention_mask = encoding.input_ids, encoding.attention_mask
+encode the targets
+target_encoding = tokenizer(
+     [output_sequence_1, output_sequence_2],
+     padding="longest",
+     max_length=max_target_length,
+     truncation=True,
+     return_tensors="pt",
+ )
+labels = target_encoding.input_ids
+replace padding token id's of the labels by -100 so it's ignored by the loss
+labels[labels == tokenizer.pad_token_id] = -100
+forward pass
+loss = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels).loss
+loss.item()
+0.188
+
+Additional training tips:
+
+T5 models need a slightly higher learning rate than the default one set in the Trainer when using the AdamW
+optimizer. Typically, 1e-4 and 3e-4 work well for most problems (classification, summarization, translation, question
+answering, question generation). Note that T5 was pre-trained using the AdaFactor optimizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ff301398613c85278006fc877fd46a982f98b39
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_6.txt
@@ -0,0 +1,29 @@
+According to this forum post, task prefixes matter when
+(1) doing multi-task training (2) your task is similar or related to one of the supervised tasks used in T5's
+pre-training mixture (see Appendix D of the paper for the task prefixes
+used).
+If training on TPU, it is recommended to pad all examples of the dataset to the same length or make use of
+pad_to_multiple_of to have a small number of predefined bucket sizes to fit all examples in. Dynamically padding
+batches to the longest example is not recommended on TPU as it triggers a recompilation for every batch shape that is
+encountered during training thus significantly slowing down the training. only padding up to the longest example in a
+batch) leads to very slow training on TPU.
+Inference
+At inference time, it is recommended to use [~generation.GenerationMixin.generate]. This
+method takes care of encoding the input and feeding the encoded hidden states via cross-attention layers to the decoder
+and auto-regressively generates the decoder output. Check out this blog post to know all the details about generating text with Transformers.
+There's also this blog post which explains how
+generation works in general in encoder-decoder models.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
+outputs = model.generate(input_ids)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+Das Haus ist wunderbar.
+
+Note that T5 uses the pad_token_id as the decoder_start_token_id, so when doing generation without using
+[~generation.GenerationMixin.generate], make sure you start it with the pad_token_id.
+The example above only shows a single example. You can also do batched inference, like so:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1dcb5f017b614243c2bdc158af568923fce7825b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_7.txt
@@ -0,0 +1,36 @@
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+task_prefix = "translate English to German: "
+use different length sentences to test batching
+sentences = ["The house is wonderful.", "I like to work in NYC."]
+inputs = tokenizer([task_prefix + sentence for sentence in sentences], return_tensors="pt", padding=True)
+output_sequences = model.generate(
+     input_ids=inputs["input_ids"],
+     attention_mask=inputs["attention_mask"],
+     do_sample=False,  # disable sampling to test if batching affects output
+ )
+print(tokenizer.batch_decode(output_sequences, skip_special_tokens=True))
+['Das Haus ist wunderbar.', 'Ich arbeite gerne in NYC.']
+
+Because T5 has been trained with the span-mask denoising objective,
+it can be used to predict the sentinel (masked-out) tokens during inference.
+The predicted tokens will then be placed between the sentinel tokens.
+thon
+
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
+model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
+input_ids = tokenizer("The  walks in  park", return_tensors="pt").input_ids
+sequence_ids = model.generate(input_ids)
+sequences = tokenizer.batch_decode(sequence_ids)
+sequences
+['  park offers  the  park.']
+
+Performance
+If you'd like a faster training and inference performance, install NVIDIA APEX for NVIDIA GPUs, or ROCm APEX for AMD GPUs and then the model will automatically use apex.normalization.FusedRMSNorm instead of T5LayerNorm. The former uses an optimized fused kernel which is several times faster than the latter.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with T5. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b4db8eff43e4394a61de0373b829935594e88c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_8.txt
@@ -0,0 +1,37 @@
+A notebook for how to finetune T5 for classification and multiple choice.
+A notebook for how to finetune T5 for sentiment span extraction. 🌎
+
+A notebook for how to finetune T5 for named entity recognition. 🌎
+
+A notebook for Finetuning CodeT5 for generating docstrings from Ruby code.
+
+A notebook to Finetune T5-base-dutch to perform Dutch abstractive summarization on a TPU.
+A notebook for how to finetune T5 for summarization in PyTorch and track experiments with WandB. 🌎
+A blog post on Distributed Training: Train BART/T5 for Summarization using 🤗 Transformers and Amazon SageMaker.
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+[FlaxT5ForConditionalGeneration] is supported by this example script.
+Summarization chapter of the 🤗 Hugging Face course.
+Summarization task guide
+
+[FlaxT5ForConditionalGeneration] is supported by this example script for training T5 with a span-masked language model objective. The script also shows how to train a T5 tokenizer. [FlaxT5ForConditionalGeneration] is also supported by this notebook.
+
+[T5ForConditionalGeneration] is supported by this example script and notebook.
+[TFT5ForConditionalGeneration] is supported by this example script and notebook.
+Translation task guide
+
+A notebook on how to finetune T5 for question answering with TensorFlow 2. 🌎
+A notebook on how to finetune T5 for question answering on a TPU.
+
+🚀 Deploy
+- A blog post on how to deploy T5 11B for inference for less than $500.
+T5Config
+[[autodoc]] T5Config
+T5Tokenizer
+[[autodoc]] T5Tokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+T5TokenizerFast
+[[autodoc]] T5TokenizerFast
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ed6c3c710cd4dcf118a625f7e528535fcbcb50f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5.txt_chunk_9.txt
@@ -0,0 +1,42 @@
+T5Model
+[[autodoc]] T5Model
+    - forward
+T5ForConditionalGeneration
+[[autodoc]] T5ForConditionalGeneration
+    - forward
+T5EncoderModel
+[[autodoc]] T5EncoderModel
+    - forward
+T5ForSequenceClassification
+[[autodoc]] T5ForSequenceClassification
+    - forward
+T5ForTokenClassification
+[[autodoc]] T5ForTokenClassification
+    - forward
+T5ForQuestionAnswering
+[[autodoc]] T5ForQuestionAnswering
+    - forward
+
+TFT5Model
+[[autodoc]] TFT5Model
+    - call
+TFT5ForConditionalGeneration
+[[autodoc]] TFT5ForConditionalGeneration
+    - call
+TFT5EncoderModel
+[[autodoc]] TFT5EncoderModel
+    - call
+
+FlaxT5Model
+[[autodoc]] FlaxT5Model
+    - call
+    - encode
+    - decode
+FlaxT5ForConditionalGeneration
+[[autodoc]] FlaxT5ForConditionalGeneration
+    - call
+    - encode
+    - decode
+FlaxT5EncoderModel
+[[autodoc]] FlaxT5EncoderModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fac2247d928e5a65d35c6504a925d3301a8fa9b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_t5v1.1.txt_chunk_0.txt
@@ -0,0 +1,43 @@
+T5v1.1
+Overview
+T5v1.1 was released in the google-research/text-to-text-transfer-transformer
+repository by Colin Raffel et al. It's an improved version of the original T5 model.
+This model was contributed by patrickvonplaten. The original code can be
+found here.
+Usage tips
+One can directly plug in the weights of T5v1.1 into a T5 model, like so:
+thon
+
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained("google/t5-v1_1-base")
+
+T5 Version 1.1 includes the following improvements compared to the original T5 model:
+
+GEGLU activation in the feed-forward hidden layer, rather than ReLU. See this paper.
+
+Dropout was turned off in pre-training (quality win). Dropout should be re-enabled during fine-tuning.
+
+Pre-trained on C4 only without mixing in the downstream tasks.
+
+No parameter sharing between the embedding and classifier layer.
+
+"xl" and "xxl" replace "3B" and "11B". The model shapes are a bit different - larger d_model and smaller
+  num_heads and d_ff.
+
+Note: T5 Version 1.1 was only pre-trained on C4 excluding any supervised
+training. Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5
+model. Since t5v1.1 was pre-trained unsupervisedly, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+Google has released the following variants:
+
+google/t5-v1_1-small
+
+google/t5-v1_1-base
+
+google/t5-v1_1-large
+
+google/t5-v1_1-xl
+
+google/t5-v1_1-xxl.
+
+Refer to T5's documentation page for all API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ecb644b69cb79d0c6ce9cff7a851cfa13f64359f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_table-transformer.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Table Transformer
+Overview
+The Table Transformer model was proposed in PubTables-1M: Towards comprehensive table extraction from unstructured documents by
+Brandon Smock, Rohith Pesala, Robin Abraham. The authors introduce a new dataset, PubTables-1M, to benchmark progress in table extraction from unstructured documents,
+as well as table structure recognition and functional analysis. The authors train 2 DETR models, one for table detection and one for table structure recognition, dubbed Table Transformers.
+The abstract from the paper is the following:
+Recently, significant progress has been made applying machine learning to the problem of table structure inference and extraction from unstructured documents.
+However, one of the greatest challenges remains the creation of datasets with complete, unambiguous ground truth at scale. To address this, we develop a new, more
+comprehensive dataset for table extraction, called PubTables-1M. PubTables-1M contains nearly one million tables from scientific articles, supports multiple input
+modalities, and contains detailed header and location information for table structures, making it useful for a wide variety of modeling approaches. It also addresses a significant
+source of ground truth inconsistency observed in prior datasets called oversegmentation, using a novel canonicalization procedure. We demonstrate that these improvements lead to a
+significant increase in training performance and a more reliable estimate of model performance at evaluation for table structure recognition. Further, we show that transformer-based
+object detection models trained on PubTables-1M produce excellent results for all three tasks of detection, structure recognition, and functional analysis without the need for any
+special customization for these tasks.
+
+ Table detection and table structure recognition clarified. Taken from the original paper. 
+The authors released 2 models, one for table detection in 
+documents, one for table structure recognition 
+(the task of recognizing the individual rows, columns etc. in a table).
+This model was contributed by nielsr. The original code can be
+found here.
+Resources
+
+A demo notebook for the Table Transformer can be found here.
+It turns out padding of images is quite important for detection. An interesting Github thread with replies from the authors can be found here.
+
+TableTransformerConfig
+[[autodoc]] TableTransformerConfig
+TableTransformerModel
+[[autodoc]] TableTransformerModel
+    - forward
+TableTransformerForObjectDetection
+[[autodoc]] TableTransformerForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..993b715db812f0a376d34197a6de3facb598f8b1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_0.txt
@@ -0,0 +1,13 @@
+TAPAS
+Overview
+The TAPAS model was proposed in TAPAS: Weakly Supervised Table Parsing via Pre-training
+by Jonathan Herzig, Paweł Krzysztof Nowak, Thomas Müller, Francesco Piccinno and Julian Martin Eisenschlos. It's a BERT-based model specifically 
+designed (and pre-trained) for answering questions about tabular data. Compared to BERT, TAPAS uses relative position embeddings and has 7 
+token types that encode tabular structure. TAPAS is pre-trained on the masked language modeling (MLM) objective on a large dataset comprising 
+millions of tables from English Wikipedia and corresponding texts. 
+For question answering, TAPAS has 2 heads on top: a cell selection head and an aggregation head, for (optionally) performing aggregations (such as counting or summing) among selected cells. TAPAS has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce). 
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b35b5fb0b8ea4c3a22857df1df791bfa0efde2ef
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_1.txt
@@ -0,0 +1,6 @@
+It achieves state-of-the-art on both SQA and WTQ, while having comparable performance to SOTA on WikiSQL, with a much simpler architecture.
+The abstract from the paper is the following:
+Answering natural language questions over tables is usually seen as a semantic parsing task. To alleviate the collection cost of full logical forms, one popular approach focuses on weak supervision consisting of denotations instead of logical forms. However, training semantic parsers from weak supervision poses difficulties, and in addition, the generated logical forms are only used as an intermediate step prior to retrieving the denotation. In this paper, we present TAPAS, an approach to question answering over tables without generating logical forms. TAPAS trains from weak supervision, and predicts the denotation by selecting table cells and optionally applying a corresponding aggregation operator to such selection. TAPAS extends BERT's architecture to encode tables as input, initializes from an effective joint pre-training of text segments and tables crawled from Wikipedia, and is trained end-to-end. We experiment with three different semantic parsing datasets, and find that TAPAS outperforms or rivals semantic parsing models by improving state-of-the-art accuracy on SQA from 55.1 to 67.2 and performing on par with the state-of-the-art on WIKISQL and WIKITQ, but with a simpler model architecture. We additionally find that transfer learning, which is trivial in our setting, from WIKISQL to WIKITQ, yields 48.7 accuracy, 4.2 points above the state-of-the-art.
+In addition, the authors have further pre-trained TAPAS to recognize table entailment, by creating a balanced dataset of millions of automatically created training examples which are learned in an intermediate step prior to fine-tuning. The authors of TAPAS call this further pre-training intermediate pre-training (since TAPAS is first pre-trained on MLM, and then on another dataset). They found that intermediate pre-training further improves performance on SQA, achieving a new state-of-the-art as well as state-of-the-art on TabFact, a large-scale dataset with 16k Wikipedia tables for table entailment (a binary classification task). For more details, see their follow-up paper: Understanding tables with intermediate pre-training by Julian Martin Eisenschlos, Syrine Krichene and Thomas Müller.
+ 
+ TAPAS architecture. Taken from the original blog post.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d7a15a7b2249b21df1b4dcaf7e3cf23a81e05a6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_10.txt
@@ -0,0 +1,34 @@
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="tf",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
+
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b20cc8c40ff2fe487e4549bcf7188bc2d43e6d7d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_11.txt
@@ -0,0 +1,46 @@
+import tensorflow as tf
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset:
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def iter(self):
+         for idx in range(self.len()):
+             item = self.data.iloc[idx]
+             table = pd.read_csv(table_csv_path + item.table_file).astype(
+                 str
+             )  # be sure to make your table data text only
+             encoding = self.tokenizer(
+                 table=table,
+                 queries=item.question,
+                 answer_coordinates=item.answer_coordinates,
+                 answer_text=item.answer_text,
+                 truncation=True,
+                 padding="max_length",
+                 return_tensors="tf",
+             )
+             # remove the batch dimension which the tokenizer adds by default
+             encoding = {key: tf.squeeze(val, 0) for key, val in encoding.items()}
+             # add the float_answer which is also required (weak supervision for aggregation case)
+             encoding["float_answer"] = tf.convert_to_tensor(item.float_answer, dtype=tf.float32)
+             yield encoding["input_ids"], encoding["attention_mask"], encoding["numeric_values"], encoding[
+                 "numeric_values_scale"
+             ], encoding["token_type_ids"], encoding["labels"], encoding["float_answer"]
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+output_signature = (
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+     tf.TensorSpec(shape=(512, 7), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.int32),
+     tf.TensorSpec(shape=(512,), dtype=tf.float32),
+ )
+train_dataloader = tf.data.Dataset.from_generator(train_dataset, output_signature=output_signature).batch(32)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1340363fc642736fe7b4f897df48e334296f8c9d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_12.txt
@@ -0,0 +1,32 @@
+Note that here, we encode each table-question pair independently. This is fine as long as your dataset is not conversational. In case your dataset involves conversational questions (such as in SQA), then you should first group together the queries, answer_coordinates and answer_text per table (in the order of their position
+index) and batch encode each table with its questions. This will make sure that the prev_labels token types (see docs of [TapasTokenizer]) are set correctly. See this notebook for more info. See this notebook for more info regarding using the TensorFlow model.
+**STEP 4: Train (fine-tune) the model
+
+You can then fine-tune [TapasForQuestionAnswering] as follows (shown here for the weak supervision for aggregation case):
+
+from transformers import TapasConfig, TapasForQuestionAnswering, AdamW
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = AdamW(model.parameters(), lr=5e-5)
+model.train()
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch["input_ids"]
+         attention_mask = batch["attention_mask"]
+         token_type_ids = batch["token_type_ids"]
+         labels = batch["labels"]
+         numeric_values = batch["numeric_values"]
+         numeric_values_scale = batch["numeric_values_scale"]
+         float_answer = batch["float_answer"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e27e53e334bd21035fcb75f9bd522f7abff521e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_13.txt
@@ -0,0 +1,62 @@
+# zero the parameter gradients
+         optimizer.zero_grad()
+         # forward + backward + optimize
+         outputs = model(
+             input_ids=input_ids,
+             attention_mask=attention_mask,
+             token_type_ids=token_type_ids,
+             labels=labels,
+             numeric_values=numeric_values,
+             numeric_values_scale=numeric_values_scale,
+             float_answer=float_answer,
+         )
+         loss = outputs.loss
+         loss.backward()
+         optimizer.step()
+``
+</pt>
+<tf>
+You can then fine-tune [TFTapasForQuestionAnswering`] as follows (shown here for the weak supervision for aggregation case):
+
+import tensorflow as tf
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+this is the default WTQ configuration
+config = TapasConfig(
+     num_aggregation_labels=4,
+     use_answer_as_supervision=True,
+     answer_loss_cutoff=0.664694,
+     cell_selection_preference=0.207951,
+     huber_loss_delta=0.121194,
+     init_cell_selection_weights_to_zero=True,
+     select_one_column=True,
+     allow_empty_column_selection=False,
+     temperature=0.0352513,
+ )
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
+for epoch in range(2):  # loop over the dataset multiple times
+     for batch in train_dataloader:
+         # get the inputs;
+         input_ids = batch[0]
+         attention_mask = batch[1]
+         token_type_ids = batch[4]
+         labels = batch[-1]
+         numeric_values = batch[2]
+         numeric_values_scale = batch[3]
+         float_answer = batch[6]
+
+         # forward + backward + optimize
+         with tf.GradientTape() as tape:
+             outputs = model(
+                 input_ids=input_ids,
+                 attention_mask=attention_mask,
+                 token_type_ids=token_type_ids,
+                 labels=labels,
+                 numeric_values=numeric_values,
+                 numeric_values_scale=numeric_values_scale,
+                 float_answer=float_answer,
+             )
+         grads = tape.gradient(outputs.loss, model.trainable_weights)
+         optimizer.apply_gradients(zip(grads, model.trainable_weights))
+
+Usage: inference
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb9498249b9ba26ced64cf71e76a92b2966cd28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_14.txt
@@ -0,0 +1,4 @@
+Usage: inference
+
+Here we explain how you can use [TapasForQuestionAnswering] or [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, only input_ids, attention_mask and token_type_ids (which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions] method to convert these into predicted coordinates and optional aggregation indices.
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9418d6f649d8b7cabf466ff0aef5ce20a1e8fd7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_15.txt
@@ -0,0 +1,45 @@
+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..493f94134897634df595ee5f5d1240694446b4a0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_16.txt
@@ -0,0 +1,8 @@
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
+``
+</pt>
+<tf>
+Here we explain how you can use [TFTapasForQuestionAnswering] for inference (i.e. making predictions on new data). For inference, onlyinput_ids,attention_maskandtoken_type_ids(which you can obtain using [TapasTokenizer]) have to be provided to the model to obtain the logits. Next, you can use the handy [~models.tapas.tokenization_tapas.convert_logits_to_predictions`] method to convert these into predicted coordinates and optional aggregation indices.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ab3a1ed6f85d0cc4f118aa4909f191e941d0590
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_17.txt
@@ -0,0 +1 @@
+However, note that inference is different depending on whether or not the setup is conversational. In a non-conversational set-up, inference can be done in parallel on all table-question pairs of a batch. Here's an example of that:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8c649934e00938dbbf567fe69d527ad36a305f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_18.txt
@@ -0,0 +1,45 @@
+from transformers import TapasTokenizer, TFTapasForQuestionAnswering
+import pandas as pd
+model_name = "google/tapas-base-finetuned-wtq"
+model = TFTapasForQuestionAnswering.from_pretrained(model_name)
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="tf")
+outputs = model(**inputs)
+predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+     inputs, outputs.logits, outputs.logits_aggregation
+ )
+let's print out the results:
+id2aggregation = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3: "COUNT"}
+aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+answers = []
+for coordinates in predicted_answer_coordinates:
+     if len(coordinates) == 1:
+         # only a single cell:
+         answers.append(table.iat[coordinates[0]])
+     else:
+         # multiple cells
+         cell_values = []
+         for coordinate in coordinates:
+             cell_values.append(table.iat[coordinate])
+         answers.append(", ".join(cell_values))
+display(table)
+print("")
+for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+     print(query)
+     if predicted_agg == "NONE":
+         print("Predicted answer: " + answer)
+     else:
+         print("Predicted answer: " + predicted_agg + " > " + answer)
+What is the name of the first actor?
+Predicted answer: Brad Pitt
+How many movies has George Clooney played in?
+Predicted answer: COUNT > 69
+What is the total number of movies?
+Predicted answer: SUM > 87, 53, 69
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d077342366e76a6c7c626f2abe80e27008480086
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_19.txt
@@ -0,0 +1,41 @@
+In case of a conversational set-up, then each table-question pair must be provided sequentially to the model, such that the prev_labels token types can be overwritten by the predicted labels of the previous table-question pair. Again, more info can be found in this notebook (for PyTorch) and this notebook (for TensorFlow).
+Resources
+
+Text classification task guide
+Masked language modeling task guide
+
+TAPAS specific outputs
+[[autodoc]] models.tapas.modeling_tapas.TableQuestionAnsweringOutput
+TapasConfig
+[[autodoc]] TapasConfig
+TapasTokenizer
+[[autodoc]] TapasTokenizer
+    - call
+    - convert_logits_to_predictions
+    - save_vocabulary
+
+TapasModel
+[[autodoc]] TapasModel
+    - forward
+TapasForMaskedLM
+[[autodoc]] TapasForMaskedLM
+    - forward
+TapasForSequenceClassification
+[[autodoc]] TapasForSequenceClassification
+    - forward
+TapasForQuestionAnswering
+[[autodoc]] TapasForQuestionAnswering
+    - forward
+
+TFTapasModel
+[[autodoc]] TFTapasModel
+    - call
+TFTapasForMaskedLM
+[[autodoc]] TFTapasForMaskedLM
+    - call
+TFTapasForSequenceClassification
+[[autodoc]] TFTapasForSequenceClassification
+    - call
+TFTapasForQuestionAnswering
+[[autodoc]] TFTapasForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c1e083d3104b7ed37596788457cdd1d1217cb25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+TAPAS architecture. Taken from the original blog post.
+This model was contributed by nielsr. The Tensorflow version of this model was contributed by kamalkraj. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1d8c8ac11d3c357b634f5ca27d4e2674e52fc67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_3.txt
@@ -0,0 +1,4 @@
+TAPAS is a model that uses relative position embeddings by default (restarting the position embeddings at every cell of the table). Note that this is something that was added after the publication of the original TAPAS paper. According to the authors, this usually results in a slightly better performance, and allows you to encode longer sequences without running out of embeddings. This is reflected in the reset_position_index_per_cell parameter of [TapasConfig], which is set to True by default. The default versions of the models available on the hub all use relative position embeddings. You can still use the ones with absolute position embeddings by passing in an additional argument revision="no_reset" when calling the from_pretrained() method. Note that it's usually advised to pad the inputs on the right rather than the left.
+TAPAS is based on BERT, so TAPAS-base for example corresponds to a BERT-base architecture. Of course, TAPAS-large will result in the best performance (the results reported in the paper are from TAPAS-large). Results of the various sized models are shown on the original GitHub repository.
+TAPAS has checkpoints fine-tuned on SQA, which are capable of answering questions related to a table in a conversational set-up. This means that you can ask follow-up questions such as "what is his age?" related to the previous question. Note that the forward pass of TAPAS is a bit different in case of a conversational set-up: in that case, you have to feed every table-question pair one by one to the model, such that the prev_labels token type ids can be overwritten by the predicted labels of the model to the previous question. See "Usage" section for more info.
+TAPAS is similar to BERT and therefore relies on the masked language modeling (MLM) objective. It is therefore efficient at predicting masked tokens and at NLU in general, but is not optimal for text generation. Models trained with a causal language modeling (CLM) objective are better in that regard. Note that TAPAS can be used as an encoder in the EncoderDecoderModel framework, to combine it with an autoregressive text decoder such as GPT-2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..079f1741dbc4ae6429bd6bfbf8de451d04944df6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_4.txt
@@ -0,0 +1,17 @@
+Usage: fine-tuning
+Here we explain how you can fine-tune [TapasForQuestionAnswering] on your own dataset.
+STEP 1: Choose one of the 3 ways in which you can use TAPAS - or experiment
+Basically, there are 3 different ways in which one can fine-tune [TapasForQuestionAnswering], corresponding to the different datasets on which Tapas was fine-tuned:
+
+SQA: if you're interested in asking follow-up questions related to a table, in a conversational set-up. For example if you first ask "what's the name of the first actor?" then you can ask a follow-up question such as "how old is he?". Here, questions do not involve any aggregation (all questions are cell selection questions).
+WTQ: if you're not interested in asking questions in a conversational set-up, but rather just asking questions related to a table, which might involve aggregation, such as counting a number of rows, summing up cell values or averaging cell values. You can then for example ask "what's the total number of goals Cristiano Ronaldo made in his career?". This case is also called weak supervision, since the model itself must learn the appropriate aggregation operator (SUM/COUNT/AVERAGE/NONE) given only the answer to the question as supervision.
+WikiSQL-supervised: this dataset is based on WikiSQL with the model being given the ground truth aggregation operator during training. This is also called strong supervision. Here, learning the appropriate aggregation operator is much easier.
+
+To summarize:
+| Task                            | Example dataset | Description                                                                                         |
+|-------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------|
+| Conversational                      | SQA                 | Conversational, only cell selection questions                                                           |
+| Weak supervision for aggregation    | WTQ                 | Questions might involve aggregation, and the model must learn this given only the answer as supervision |
+| Strong supervision for aggregation  | WikiSQL-supervised  | Questions might involve aggregation, and the model must learn this given the gold aggregation operator  |
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24b4289065b7d746f3d67acb9c5c7a0b72132749
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_5.txt
@@ -0,0 +1,21 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below.
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6a5f2e9875d1e51784c59d8299085303ce47581c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+Initializing a model with a pre-trained base and randomly initialized classification heads from the hub can be done as shown below. Be sure to have installed the tensorflow_probability dependency:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+for example, the base sized model with default SQA configuration
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base")
+or, the base sized model with WTQ configuration
+config = TapasConfig.from_pretrained("google/tapas-base-finetuned-wtq")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+or, the base sized model with WikiSQL configuration
+config = TapasConfig("google-base-finetuned-wikisql-supervised")
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+Of course, you don't necessarily have to follow one of these three ways in which TAPAS was fine-tuned. You can also experiment by defining any hyperparameters you want when initializing [TapasConfig], and then create a [TFTapasForQuestionAnswering] based on that configuration. For example, if you have a dataset that has both conversational questions and questions that might involve aggregation, then you can do it this way. Here's an example:
+
+from transformers import TapasConfig, TFTapasForQuestionAnswering
+you can initialize the classification heads any way you want (see docs of TapasConfig)
+config = TapasConfig(num_aggregation_labels=3, average_logits_per_cell=True)
+initializing the pre-trained base sized model with our custom classification heads
+model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base", config=config)
+
+What you can also do is start from an already fine-tuned checkpoint. A note here is that the already fine-tuned checkpoint on WTQ has some issues due to the L2-loss which is somewhat brittle. See here for more info.
+For a list of all pre-trained and fine-tuned TAPAS checkpoints available on HuggingFace's  hub, see here.
+STEP 2: Prepare your data in the SQA format
+Second, no matter what you picked above, you should prepare your dataset in the SQA format. This format is a TSV/CSV file with the following columns:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b8ced6bad15d690943555dc93b134f9a1f5fb25
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_7.txt
@@ -0,0 +1,12 @@
+id: optional, id of the table-question pair, for bookkeeping purposes.
+annotator: optional, id of the person who annotated the table-question pair, for bookkeeping purposes.
+position: integer indicating if the question is the first, second, third, related to the table. Only required in case of conversational setup (SQA). You don't need this column in case you're going for WTQ/WikiSQL-supervised.
+question: string
+table_file: string, name of a csv file containing the tabular data
+answer_coordinates: list of one or more tuples (each tuple being a cell coordinate, i.e. row, column pair that is part of the answer)
+answer_text: list of one or more strings (each string being a cell value that is part of the answer)
+aggregation_label: index of the aggregation operator. Only required in case of strong supervision for aggregation (the WikiSQL-supervised case)
+float_answer: the float answer to the question, if there is one (np.nan if there isn't). Only required in case of weak supervision for aggregation (such as WTQ and WikiSQL)
+
+The tables themselves should be present in a folder, each table being a separate csv file. Note that the authors of the TAPAS algorithm used conversion scripts with some automated logic to convert the other datasets (WTQ, WikiSQL) into the SQA format. The author explains this here. A conversion of this script that works with HuggingFace's implementation can be found here. Interestingly, these conversion scripts are not perfect (the answer_coordinates and float_answer fields are populated based on the answer_text), meaning that WTQ and WikiSQL results could actually be improved.
+STEP 3: Convert your data into tensors using TapasTokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87cd642893162f5452f9a133e9baf39f0180cc01
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_8.txt
@@ -0,0 +1,33 @@
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs into input_ids, attention_mask, token_type_ids and so on. Again, based on which of the three cases you picked above, [TapasForQuestionAnswering] requires different
+inputs to be fine-tuned:
+| Task                           | Required inputs                                                                                                 |
+|------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| Conversational                     | input_ids, attention_mask, token_type_ids, labels                                                           |
+|  Weak supervision for aggregation  | input_ids, attention_mask, token_type_ids, labels, numeric_values, numeric_values_scale, float_answer |
+| Strong supervision for aggregation | input ids, attention mask, token type ids, labels, aggregation_labels                                     |
+[TapasTokenizer] creates the labels, numeric_values and numeric_values_scale based on the answer_coordinates and answer_text columns of the TSV file. The float_answer and aggregation_labels are already in the TSV file of step 2. Here's an example:
+
+from transformers import TapasTokenizer
+import pandas as pd
+model_name = "google/tapas-base"
+tokenizer = TapasTokenizer.from_pretrained(model_name)
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+queries = [
+     "What is the name of the first actor?",
+     "How many movies has George Clooney played in?",
+     "What is the total number of movies?",
+ ]
+answer_coordinates = [[(0, 0)], [(2, 1)], [(0, 1), (1, 1), (2, 1)]]
+answer_text = [["Brad Pitt"], ["69"], ["209"]]
+table = pd.DataFrame.from_dict(data)
+inputs = tokenizer(
+     table=table,
+     queries=queries,
+     answer_coordinates=answer_coordinates,
+     answer_text=answer_text,
+     padding="max_length",
+     return_tensors="pt",
+ )
+inputs
+{'input_ids': tensor([[  ]]), 'attention_mask': tensor([[]]), 'token_type_ids': tensor([[[]]]),
+'numeric_values': tensor([[  ]]), 'numeric_values_scale: tensor([[  ]]), labels: tensor([[  ]])}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ee9196c57a0b9c294baa6ab8187a36d85d4efa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapas.txt_chunk_9.txt
@@ -0,0 +1,42 @@
+Note that [TapasTokenizer] expects the data of the table to be text-only. You can use .astype(str) on a dataframe to turn it into text-only data.
+Of course, this only shows how to encode a single training example. It is advised to create a dataloader to iterate over batches:
+
+import torch
+import pandas as pd
+tsv_path = "your_path_to_the_tsv_file"
+table_csv_path = "your_path_to_a_directory_containing_all_csv_files"
+class TableDataset(torch.utils.data.Dataset):
+     def init(self, data, tokenizer):
+         self.data = data
+         self.tokenizer = tokenizer
+
+     def getitem(self, idx):
+         item = data.iloc[idx]
+         table = pd.read_csv(table_csv_path + item.table_file).astype(
+             str
+         )  # be sure to make your table data text only
+         encoding = self.tokenizer(
+             table=table,
+             queries=item.question,
+             answer_coordinates=item.answer_coordinates,
+             answer_text=item.answer_text,
+             truncation=True,
+             padding="max_length",
+             return_tensors="pt",
+         )
+         # remove the batch dimension which the tokenizer adds by default
+         encoding = {key: val.squeeze(0) for key, val in encoding.items()}
+         # add the float_answer which is also required (weak supervision for aggregation case)
+         encoding["float_answer"] = torch.tensor(item.float_answer)
+         return encoding
+     def len(self):
+         return len(self.data)
+
+data = pd.read_csv(tsv_path, sep="\t")
+train_dataset = TableDataset(data, tokenizer)
+train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32)
+``
+</pt>
+<tf>
+Third, given that you've prepared your data in this TSV/CSV format (and corresponding CSV files containing the tabular data), you can then use [TapasTokenizer] to convert table-question pairs intoinput_ids,attention_mask,token_type_idsand so on. Again, based on which of the three cases you picked above, [TFTapasForQuestionAnswering`] requires different
+inputs to be fine-tuned:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d605aba64f54a60a92180f810c9a66c072563635
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TAPEX
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1779718b4bd14fc4fe311ae1207a3b20ecb899dd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+Overview
+The TAPEX model was proposed in TAPEX: Table Pre-training via Learning a Neural SQL Executor by Qian Liu,
+Bei Chen, Jiaqi Guo, Morteza Ziyadi, Zeqi Lin, Weizhu Chen, Jian-Guang Lou. TAPEX pre-trains a BART model to solve synthetic SQL queries, after
+which it can be fine-tuned to answer natural language questions related to tabular data, as well as performing table fact checking. 
+TAPEX has been fine-tuned on several datasets: 
+- SQA (Sequential Question Answering by Microsoft)
+- WTQ (Wiki Table Questions by Stanford University)
+- WikiSQL (by Salesforce)
+- TabFact (by USCB NLP Lab).
+The abstract from the paper is the following:
+Recent progress in language model pre-training has achieved a great success via leveraging large-scale unstructured textual data. However, it is
+still a challenge to apply pre-training on structured tabular data due to the absence of large-scale high-quality tabular data. In this paper, we
+propose TAPEX to show that table pre-training can be achieved by learning a neural SQL executor over a synthetic corpus, which is obtained by automatically
+synthesizing executable SQL queries and their execution outputs. TAPEX addresses the data scarcity challenge via guiding the language model to mimic a SQL
+executor on the diverse, large-scale and high-quality synthetic corpus. We evaluate TAPEX on four benchmark datasets. Experimental results demonstrate that
+TAPEX outperforms previous table pre-training approaches by a large margin and achieves new state-of-the-art results on all of them. This includes improvements
+on the weakly-supervised WikiSQL denotation accuracy to 89.5% (+2.3%), the WikiTableQuestions denotation accuracy to 57.5% (+4.8%), the SQA denotation accuracy
+to 74.5% (+3.5%), and the TabFact accuracy to 84.2% (+3.2%). To our knowledge, this is the first work to exploit table pre-training via synthetic executable programs
+and to achieve new state-of-the-art results on various downstream tasks.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4142a9e9be54e390b118ba2ba11e9c03152fd355
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+TAPEX is a generative (seq2seq) model. One can directly plug in the weights of TAPEX into a BART model. 
+TAPEX has checkpoints on the hub that are either pre-trained only, or fine-tuned on WTQ, SQA, WikiSQL and TabFact.
+Sentences + tables are presented to the model as sentence + " " + linearized table. The linearized table has the following format: 
+  col: col1 | col2 | col 3 row 1 : val1 | val2 | val3 row 2 : .
+TAPEX has its own tokenizer, that allows to prepare all data for the model easily. One can pass Pandas DataFrames and strings to the tokenizer,
+  and it will automatically create the input_ids and attention_mask (as shown in the usage examples below). 
+
+Usage: inference
+Below, we illustrate how to use TAPEX for table question answering. As one can see, one can directly plug in the weights of TAPEX into a BART model.
+We use the Auto API, which will automatically instantiate the appropriate tokenizer ([TapexTokenizer]) and model ([BartForConditionalGeneration]) for us,
+based on the configuration file of the checkpoint on the hub.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import pandas as pd
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/tapex-large-finetuned-wtq")
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+question = "how many movies does Leonardo Di Caprio have?"
+encoding = tokenizer(table, question, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+predicted_answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+print(predicted_answer)
+53
+
+Note that [TapexTokenizer] also supports batched inference. Hence, one can provide a batch of different tables/questions, or a batch of a single table
+and multiple questions, or a batch of a single query and multiple tables. Let's illustrate this:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7cda4cbacb8d282a78d364eb1a876fcabdb10bdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tapex.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+prepare table + question
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+questions = [
+     "how many movies does Leonardo Di Caprio have?",
+     "which actor has 69 movies?",
+     "what's the first name of the actor who has 87 movies?",
+ ]
+encoding = tokenizer(table, questions, padding=True, return_tensors="pt")
+let the model generate an answer autoregressively
+outputs = model.generate(**encoding)
+decode back to text
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+[' 53', ' george clooney', ' brad pitt']
+
+In case one wants to do table verification (i.e. the task of determining whether a given sentence is supported or refuted by the contents
+of a table), one can instantiate a [BartForSequenceClassification] model. TAPEX has checkpoints on the hub fine-tuned on TabFact, an important
+benchmark for table fact checking (it achieves 84% accuracy). The code example below again leverages the Auto API.
+thon
+
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+model = AutoModelForSequenceClassification.from_pretrained("microsoft/tapex-large-finetuned-tabfact")
+prepare table + sentence
+data = {"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
+table = pd.DataFrame.from_dict(data)
+sentence = "George Clooney has 30 movies"
+encoding = tokenizer(table, sentence, return_tensors="pt")
+forward pass
+outputs = model(**encoding)
+print prediction
+predicted_class_idx = outputs.logits[0].argmax(dim=0).item()
+print(model.config.id2label[predicted_class_idx])
+Refused
+
+ 
+TAPEX architecture is the same as BART, except for tokenization. Refer to BART documentation for information on 
+configuration classes and their parameters. TAPEX-specific tokenizer is documented below.  
+
+TapexTokenizer
+[[autodoc]] TapexTokenizer
+    - call
+    - save_vocabulary
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aede16c28e453a0cb46948db939e29a4977672b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Time Series Transformer
+Overview
+The Time Series Transformer model is a vanilla encoder-decoder Transformer for time series forecasting.
+This model was contributed by kashif.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50216e56e4bca7a12584de8cf66693182d110fc1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Similar to other models in the library, [TimeSeriesTransformerModel] is the raw Transformer without any head on top, and [TimeSeriesTransformerForPrediction]
+adds a distribution head on top of the former, which can be used for time-series forecasting. Note that this is a so-called probabilistic forecasting model, not a
+point forecasting model. This means that the model learns a distribution, from which one can sample. The model doesn't directly output values.
+[TimeSeriesTransformerForPrediction] consists of 2 blocks: an encoder, which takes a context_length of time series values as input (called past_values),
+and a decoder, which predicts a prediction_length of time series values into the future (called future_values). During training, one needs to provide
+pairs of (past_values and future_values) to the model.
+In addition to the raw (past_values and future_values), one typically provides additional features to the model. These can be the following:
+past_time_features: temporal features which the model will add to past_values. These serve as "positional encodings" for the Transformer encoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+future_time_features: temporal features which the model will add to future_values. These serve as "positional encodings" for the Transformer decoder.
+Examples are "day of the month", "month of the year", etc. as scalar values (and then stacked together as a vector).
+e.g. if a given time-series value was obtained on the 11th of August, then one could have [11, 8] as time feature vector (11 being "day of the month", 8 being "month of the year").
+static_categorical_features: categorical features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6799dae58cdea9f2b63cb548fa2cf7d34b902b12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_2.txt
@@ -0,0 +1,6 @@
+An example here is the store ID or region ID that identifies a given time-series.
+Note that these features need to be known for ALL data points (also those in the future).
+static_real_features: real-valued features which are static over time (i.e., have the same value for all past_values and future_values).
+An example here is the image representation of the product for which you have the time-series values (like the ResNet embedding of a "shoe" picture,
+if your time-series is about the sales of shoes).
+Note that these features need to be known for ALL data points (also those in the future).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27fb1d71e714f8d149f84f3142576d0cf2e5af0e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_time_series_transformer.txt_chunk_3.txt
@@ -0,0 +1,20 @@
+The model is trained using "teacher-forcing", similar to how a Transformer is trained for machine translation. This means that, during training, one shifts the
+future_values one position to the right as input to the decoder, prepended by the last value of past_values. At each time step, the model needs to predict the
+next target. So the set-up of training is similar to a GPT model for language, except that there's no notion of decoder_start_token_id (we just use the last value
+of the context as initial input for the decoder).
+At inference time, we give the final value of the past_values as input to the decoder. Next, we can sample from the model to make a prediction at the next time step,
+which is then fed to the decoder in order to make the next prediction (also called autoregressive generation).
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Check out the Time Series Transformer blog-post in HuggingFace blog: Probabilistic Time Series Forecasting with 🤗 Transformers
+
+TimeSeriesTransformerConfig
+[[autodoc]] TimeSeriesTransformerConfig
+TimeSeriesTransformerModel
+[[autodoc]] TimeSeriesTransformerModel
+    - forward
+TimeSeriesTransformerForPrediction
+[[autodoc]] TimeSeriesTransformerForPrediction
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d9e4440a838d57b24442e00a8d67d08fa7ecef9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_timesformer.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+TimeSformer
+Overview
+The TimeSformer model was proposed in TimeSformer: Is Space-Time Attention All You Need for Video Understanding? by Facebook Research.
+This work is a milestone in action-recognition field being the first video transformer. It inspired many transformer based video understanding and classification papers.
+The abstract from the paper is the following:
+We present a convolution-free approach to video classification built exclusively on self-attention over space and time. Our method, named "TimeSformer," adapts the standard Transformer architecture to video by enabling spatiotemporal feature learning directly from a sequence of frame-level patches. Our experimental study compares different self-attention schemes and suggests that "divided attention," where temporal attention and spatial attention are separately applied within each block, leads to the best video classification accuracy among the design choices considered. Despite the radically new design, TimeSformer achieves state-of-the-art results on several action recognition benchmarks, including the best reported accuracy on Kinetics-400 and Kinetics-600. Finally, compared to 3D convolutional networks, our model is faster to train, it can achieve dramatically higher test efficiency (at a small drop in accuracy), and it can also be applied to much longer video clips (over one minute long). Code and models are available at: this https URL.
+This model was contributed by fcakyon.
+The original code can be found here.
+Usage tips
+There are many pretrained variants. Select your pretrained model based on the dataset it is trained on. Moreover,
+the number of input frames per clip changes based on the model size so you should consider this parameter while selecting your pretrained model.
+Resources
+
+Video classification task guide
+
+TimesformerConfig
+[[autodoc]] TimesformerConfig
+TimesformerModel
+[[autodoc]] TimesformerModel
+    - forward
+TimesformerForVideoClassification
+[[autodoc]] TimesformerForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58d50c68e1bfe76b560d588fd59a8a1cb34afc90
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trajectory_transformer.txt_chunk_0.txt
@@ -0,0 +1,30 @@
+Trajectory Transformer
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The Trajectory Transformer model was proposed in Offline Reinforcement Learning as One Big Sequence Modeling Problem  by Michael Janner, Qiyang Li, Sergey Levine.
+The abstract from the paper is the following:
+Reinforcement learning (RL) is typically concerned with estimating stationary policies or single-step models,
+leveraging the Markov property to factorize problems in time. However, we can also view RL as a generic sequence
+modeling problem, with the goal being to produce a sequence of actions that leads to a sequence of high rewards.
+Viewed in this way, it is tempting to consider whether high-capacity sequence prediction models that work well
+in other domains, such as natural-language processing, can also provide effective solutions to the RL problem.
+To this end, we explore how RL can be tackled with the tools of sequence modeling, using a Transformer architecture
+to model distributions over trajectories and repurposing beam search as a planning algorithm. Framing RL as sequence
+modeling problem simplifies a range of design decisions, allowing us to dispense with many of the components common
+in offline RL algorithms. We demonstrate the flexibility of this approach across long-horizon dynamics prediction,
+imitation learning, goal-conditioned RL, and offline RL. Further, we show that this approach can be combined with
+existing model-free algorithms to yield a state-of-the-art planner in sparse-reward, long-horizon tasks.
+This model was contributed by CarlCochet. The original code can be found here.
+Usage tips
+This Transformer is used for deep reinforcement learning. To use it, you need to create sequences from
+actions, states and rewards from all previous timesteps. This model will treat all these elements together
+as one big sequence (a trajectory).
+TrajectoryTransformerConfig
+[[autodoc]] TrajectoryTransformerConfig
+TrajectoryTransformerModel
+[[autodoc]] TrajectoryTransformerModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c02d22d979563f8fbdba9de8c1e4ae4daf8c9e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Transformer XL
+
+This model is in maintenance mode only, so we won't accept any new PRs changing its code. This model was deprecated due to security issues linked to pickle.load.
+We recommend switching to more recent models for improved security.
+In case you would still like to use TransfoXL in your experiments, we recommend using the Hub checkpoint with a specific revision to ensure you are downloading safe files from the Hub.
+You will need to set the environment variable TRUST_REMOTE_CODE to True in order to allow the
+usage of pickle.load():
+thon
+import os
+from transformers import TransfoXLTokenizer, TransfoXLLMHeadModel
+os.environ["TRUST_REMOTE_CODE"] = "True"
+checkpoint = 'transfo-xl/transfo-xl-wt103'
+revision = '40a186da79458c9f9de846edfaea79c412137f97'
+tokenizer = TransfoXLTokenizer.from_pretrained(checkpoint, revision=revision)
+model = TransfoXLLMHeadModel.from_pretrained(checkpoint, revision=revision)
+
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34431ae93c7388a63c90942b55bc3189bc53f733
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.35.0.
+You can do so by running the following command: pip install -U transformers==4.35.0.
+
+Overview
+The Transformer-XL model was proposed in Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context by Zihang Dai, Zhilin Yang, Yiming Yang, Jaime Carbonell, Quoc V. Le, Ruslan
+Salakhutdinov. It's a causal (uni-directional) transformer with relative positioning (sinusoïdal) embeddings which can
+reuse previously computed hidden-states to attend to longer context (memory). This model also uses adaptive softmax
+inputs and outputs (tied).
+The abstract from the paper is the following:
+Transformers have a potential of learning longer-term dependency, but are limited by a fixed-length context in the
+setting of language modeling. We propose a novel neural architecture Transformer-XL that enables learning dependency
+beyond a fixed length without disrupting temporal coherence. It consists of a segment-level recurrence mechanism and a
+novel positional encoding scheme. Our method not only enables capturing longer-term dependency, but also resolves the
+context fragmentation problem. As a result, Transformer-XL learns dependency that is 80% longer than RNNs and 450%
+longer than vanilla Transformers, achieves better performance on both short and long sequences, and is up to 1,800+
+times faster than vanilla Transformers during evaluation. Notably, we improve the state-of-the-art results of
+bpc/perplexity to 0.99 on enwiki8, 1.08 on text8, 18.3 on WikiText-103, 21.8 on One Billion Word, and 54.5 on Penn
+Treebank (without finetuning). When trained only on WikiText-103, Transformer-XL manages to generate reasonably
+coherent, novel text articles with thousands of tokens.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..068bf3cfb62fc5c244fcdf74b4ef641ea0d7a438
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_2.txt
@@ -0,0 +1,34 @@
+Transformer-XL uses relative sinusoidal positional embeddings. Padding can be done on the left or on the right. The
+  original implementation trains on SQuAD with padding on the left, therefore the padding defaults are set to left.
+Transformer-XL is one of the few models that has no sequence length limit.
+Same as a regular GPT model, but introduces a recurrence mechanism for two consecutive segments (similar to a regular RNNs with two consecutive inputs). In this context, a segment is a number of consecutive tokens (for instance 512) that may span across multiple documents, and segments are fed in order to the model.
+Basically, the hidden states of the previous segment are concatenated to the current input to compute the attention scores. This allows the model to pay attention to information that was in the previous segment as well as the current one. By stacking multiple attention layers, the receptive field can be increased to multiple previous segments.
+This changes the positional embeddings to positional relative embeddings (as the regular positional embeddings would give the same results in the current input and the current hidden state at a given position) and needs to make some adjustments in the way attention scores are computed.
+
+TransformerXL does not work with torch.nn.DataParallel due to a bug in PyTorch, see issue #36035
+
+Resources
+
+Text classification task guide
+Causal language modeling task guide
+
+TransfoXLConfig
+[[autodoc]] TransfoXLConfig
+TransfoXLTokenizer
+[[autodoc]] TransfoXLTokenizer
+    - save_vocabulary
+TransfoXL specific outputs
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_transfo_xl.TransfoXLLMHeadModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLModelOutput
+[[autodoc]] models.deprecated.transfo_xl.modeling_tf_transfo_xl.TFTransfoXLLMHeadModelOutput
+
+TransfoXLModel
+[[autodoc]] TransfoXLModel
+    - forward
+TransfoXLLMHeadModel
+[[autodoc]] TransfoXLLMHeadModel
+    - forward
+TransfoXLForSequenceClassification
+[[autodoc]] TransfoXLForSequenceClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b385582366d81704d9ffe73a7b527f3b49c40d99
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_transfo-xl.txt_chunk_3.txt
@@ -0,0 +1,13 @@
+TFTransfoXLModel
+[[autodoc]] TFTransfoXLModel
+    - call
+TFTransfoXLLMHeadModel
+[[autodoc]] TFTransfoXLLMHeadModel
+    - call
+TFTransfoXLForSequenceClassification
+[[autodoc]] TFTransfoXLForSequenceClassification
+    - call
+
+Internal Layers
+[[autodoc]] AdaptiveEmbedding
+[[autodoc]] TFAdaptiveEmbedding
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bddd63f810a2ca395e38419a8b916bbc34ef7184
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+TrOCR
+Overview
+The TrOCR model was proposed in TrOCR: Transformer-based Optical Character Recognition with Pre-trained
+Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei. TrOCR consists of an image Transformer encoder and an autoregressive text Transformer decoder to
+perform optical character recognition (OCR).
+The abstract from the paper is the following:
+Text recognition is a long-standing research problem for document digitalization. Existing approaches for text recognition
+are usually built based on CNN for image understanding and RNN for char-level text generation. In addition, another language
+model is usually needed to improve the overall accuracy as a post-processing step. In this paper, we propose an end-to-end
+text recognition approach with pre-trained image Transformer and text Transformer models, namely TrOCR, which leverages the
+Transformer architecture for both image understanding and wordpiece-level text generation. The TrOCR model is simple but
+effective, and can be pre-trained with large-scale synthetic data and fine-tuned with human-labeled datasets. Experiments
+show that the TrOCR model outperforms the current state-of-the-art models on both printed and handwritten text recognition
+tasks.
+
+ TrOCR architecture. Taken from the original paper. 
+Please refer to the [VisionEncoderDecoder] class on how to use this model.
+This model was contributed by nielsr. The original code can be found
+here.
+Usage tips
+
+The quickest way to get started with TrOCR is by checking the tutorial
+  notebooks, which show how to use the model
+  at inference time as well as fine-tuning on custom data.
+TrOCR is pre-trained in 2 stages before being fine-tuned on downstream datasets. It achieves state-of-the-art results
+  on both printed (e.g. the SROIE dataset and handwritten (e.g. the IAM
+  Handwriting dataset text recognition tasks. For more
+  information, see the official models.
+TrOCR is always used within the VisionEncoderDecoder framework.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with TrOCR. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32826b9b94607d2fcededd6befce9625d1e0e8e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_trocr.txt_chunk_1.txt
@@ -0,0 +1,50 @@
+A blog post on Accelerating Document AI with TrOCR.
+A blog post on how to Document AI with TrOCR.
+A notebook on how to finetune TrOCR on IAM Handwriting Database using Seq2SeqTrainer.
+A notebook on inference with TrOCR and Gradio demo.
+A notebook on finetune TrOCR on the IAM Handwriting Database using native PyTorch.
+A notebook on evaluating TrOCR on the IAM test set.
+
+Casual language modeling task guide.
+
+⚡️ Inference
+
+An interactive-demo on TrOCR handwritten character recognition.
+
+Inference
+TrOCR's [VisionEncoderDecoder] model accepts images as input and makes use of
+[~generation.GenerationMixin.generate] to autoregressively generate text given the input image.
+The [ViTImageProcessor/DeiTImageProcessor] class is responsible for preprocessing the input image and
+[RobertaTokenizer/XLMRobertaTokenizer] decodes the generated target tokens to the target string. The
+[TrOCRProcessor] wraps [ViTImageProcessor/DeiTImageProcessor] and [RobertaTokenizer/XLMRobertaTokenizer]
+into a single instance to both extract the input features and decode the predicted token ids.
+
+Step-by-step Optical Character Recognition (OCR)
+
+``` py
+
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
+from PIL import Image
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+load image from the IAM dataset
+url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"
+image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+pixel_values = processor(image, return_tensors="pt").pixel_values
+generated_ids = model.generate(pixel_values)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+
+See the model hub to look for TrOCR checkpoints.
+TrOCRConfig
+[[autodoc]] TrOCRConfig
+TrOCRProcessor
+[[autodoc]] TrOCRProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+TrOCRForCausalLM
+[[autodoc]] TrOCRForCausalLM
+     - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95580bf3e4d094a9a678a6aff03dae7e5f945b42
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+TVLT
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
+
+Overview
+The TVLT model was proposed in TVLT: Textless Vision-Language Transformer
+by Zineng Tang, Jaemin Cho, Yixin Nie, Mohit Bansal (the first three authors contributed equally). The Textless Vision-Language Transformer (TVLT) is a model that uses raw visual and audio inputs for vision-and-language representation learning, without using text-specific modules such as tokenization or automatic speech recognition (ASR). It can perform various audiovisual and vision-language tasks like retrieval, question answering, etc.
+The abstract from the paper is the following:
+In this work, we present the Textless Vision-Language Transformer (TVLT), where homogeneous transformer blocks take raw visual and audio inputs for vision-and-language representation learning with minimal modality-specific design, and do not use text-specific modules such as tokenization or automatic speech recognition (ASR). TVLT is trained by reconstructing masked patches of continuous video frames and audio spectrograms (masked autoencoding) and contrastive modeling to align video and audio. TVLT attains performance comparable to its text-based counterpart on various multimodal tasks, such as visual question answering, image retrieval, video retrieval, and multimodal sentiment analysis, with 28x faster inference speed and only 1/3 of the parameters. Our findings suggest the possibility of learning compact and efficient visual-linguistic representations from low-level visual and audio signals without assuming the prior existence of text.
+
+ TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a298518a4e0d908aafb629d14b352abf69bfc2e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvlt.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+TVLT architecture. Taken from the https://arxiv.org/abs/2102.03334">original paper. 
+The original code can be found here. This model was contributed by Zineng Tang.
+Usage tips
+
+TVLT is a model that takes both pixel_values and audio_values as input. One can use [TvltProcessor] to prepare data for the model.
+  This processor wraps an image processor (for the image/video modality) and an audio feature extractor (for the audio modality) into one.
+TVLT is trained with images/videos and audios of various sizes: the authors resize and crop the input images/videos to 224 and limit the length of audio spectrogram to 2048. To make batching of videos and audios possible, the authors use a pixel_mask that indicates which pixels are real/padding and audio_mask that indicates which audio values are real/padding.
+The design of TVLT is very similar to that of a standard Vision Transformer (ViT) and masked autoencoder (MAE) as in ViTMAE. The difference is that the model includes embedding layers for the audio modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+TvltConfig
+[[autodoc]] TvltConfig
+TvltProcessor
+[[autodoc]] TvltProcessor
+    - call
+TvltImageProcessor
+[[autodoc]] TvltImageProcessor
+    - preprocess
+TvltFeatureExtractor
+[[autodoc]] TvltFeatureExtractor
+    - call
+TvltModel
+[[autodoc]] TvltModel
+    - forward
+TvltForPreTraining
+[[autodoc]] TvltForPreTraining
+    - forward
+TvltForAudioVisualClassification
+[[autodoc]] TvltForAudioVisualClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2f0e1bd5319aaba5d0e0ae857a822ffe94102e6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+TVP
+Overview
+The text-visual prompting (TVP) framework was proposed in the paper Text-Visual Prompting for Efficient 2D Temporal Video Grounding by Yimeng Zhang, Xin Chen, Jinghan Jia, Sijia Liu, Ke Ding.
+The abstract from the paper is the following:
+In this paper, we study the problem of temporal video grounding (TVG), which aims to predict the starting/ending time points of moments described by a text sentence within a long untrimmed video. Benefiting from fine-grained 3D visual features, the TVG techniques have achieved remarkable progress in recent years. However, the high complexity of 3D convolutional neural networks (CNNs) makes extracting dense 3D visual features time-consuming, which calls for intensive memory and computing resources. Towards efficient TVG, we propose a novel text-visual prompting (TVP) framework, which incorporates optimized perturbation patterns (that we call ‘prompts’) into both visual inputs and textual features of a TVG model. In sharp contrast to 3D CNNs, we show that TVP allows us to effectively co-train vision encoder and language encoder in a 2D TVG model and improves the performance of cross-modal feature fusion using only low-complexity sparse 2D visual features. Further, we propose a Temporal-Distance IoU (TDIoU) loss for efficient learning of TVG. Experiments on two benchmark datasets, Charades-STA and ActivityNet Captions datasets, empirically show that the proposed TVP significantly boosts the performance of 2D TVG (e.g., 9.79% improvement on Charades-STA and 30.77% improvement on ActivityNet Captions) and achieves 5× inference acceleration over TVG using 3D visual features.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4c001b4e13352a20a389b83c44ec24be5cb36de
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_1.txt
@@ -0,0 +1 @@
+This research addresses temporal video grounding (TVG), which is the process of pinpointing the start and end times of specific events in a long video, as described by a text sentence. Text-visual prompting (TVP), is proposed to enhance TVG. TVP involves integrating specially designed patterns, known as 'prompts', into both the visual (image-based) and textual (word-based) input components of a TVG model. These prompts provide additional spatial-temporal context, improving the model's ability to accurately determine event timings in the video. The approach employs 2D visual inputs in place of 3D ones. Although 3D inputs offer more spatial-temporal detail, they are also more time-consuming to process. The use of 2D inputs with the prompting method aims to provide similar levels of context and accuracy more efficiently.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71dfb615cff5b5cec76485c0b66bc4cd88d13895
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_2.txt
@@ -0,0 +1,30 @@
+TVP architecture. Taken from the original paper. 
+This model was contributed by Jiqing Feng. The original code can be found here.
+Usage tips and examples
+Prompts are optimized perturbation patterns, which would be added to input video frames or text features. Universal set refers to using the same exact set of prompts for any input, this means that these prompts are added consistently to all video frames and text features, regardless of the input's content.
+TVP consists of a visual encoder and cross-modal encoder. A universal set of visual prompts and text prompts to be integrated into sampled video frames and textual features, respectively. Specially, a set of different visual prompts are applied to uniformly-sampled frames of one untrimmed video in order.
+The goal of this model is to incorporate trainable prompts into both visual inputs and textual features to temporal video grounding(TVG) problems.
+In principle, one can apply any visual, cross-modal encoder in the proposed architecture.
+The [TvpProcessor] wraps [BertTokenizer] and [TvpImageProcessor] into a single instance to both
+encode the text and prepare the images respectively.
+The following example shows how to run temporal video grounding using [TvpProcessor] and [TvpForVideoGrounding].
+thon
+import av
+import cv2
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import AutoProcessor, TvpForVideoGrounding
+def pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Convert the video from its original fps to the target_fps and decode the video with PyAV decoder.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46abb6dd7a4bfc659e4ba0930e8559f40a7cf1aa
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video. Return None if the no
+            video stream was found.
+        fps (float): the number of frames per second of the video.
+    '''
+    video = container.streams.video[0]
+    fps = float(video.average_rate)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    delta = max(num_frames - clip_size, 0)
+    start_idx = delta * clip_idx / num_clips
+    end_idx = start_idx + clip_size - 1
+    timebase = video.duration / num_frames
+    video_start_pts = int(start_idx * timebase)
+    video_end_pts = int(end_idx * timebase)
+    seek_offset = max(video_start_pts - 1024, 0)
+    container.seek(seek_offset, any_frame=False, backward=True, stream=video)
+    frames = {}
+    for frame in container.decode(video=0):
+        if frame.pts < video_start_pts:
+            continue
+        frames[frame.pts] = frame
+        if frame.pts > video_end_pts:
+            break
+    frames = [frames[pts] for pts in sorted(frames)]
+    return frames, fps
+def decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps):
+    '''
+    Decode the video and perform temporal sampling.
+    Args:
+        container (container): pyav container.
+        sampling_rate (int): frame sampling rate (interval between two sampled frames).
+        num_frames (int): number of frames to sample.
+        clip_idx (int): if clip_idx is -1, perform random temporal sampling.
+            If clip_idx is larger than -1, uniformly split the video to num_clips
+            clips, and select the clip_idx-th video clip.
+        num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd6a8e4f998941cc4b670b3f4276d1d013f52b3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_4.txt
@@ -0,0 +1,41 @@
+num_clips (int): overall number of clips to uniformly sample from the given video.
+        target_fps (int): the input video may have different fps, convert it to
+            the target video fps before frame sampling.
+    Returns:
+        frames (tensor): decoded frames from the video.
+    '''
+    assert clip_idx >= -2, "Not a valied clip_idx {}".format(clip_idx)
+    frames, fps = pyav_decode(container, sampling_rate, num_frames, clip_idx, num_clips, target_fps)
+    clip_size = sampling_rate * num_frames / target_fps * fps
+    index = np.linspace(0, clip_size - 1, num_frames)
+    index = np.clip(index, 0, len(frames) - 1).astype(np.int64)
+    frames = np.array([frames[idx].to_rgb().to_ndarray() for idx in index])
+    frames = frames.transpose(0, 3, 1, 2)
+    return frames
+file = hf_hub_download(repo_id="Intel/tvp_demo", filename="AK2KG.mp4", repo_type="dataset")
+model = TvpForVideoGrounding.from_pretrained("Intel/tvp-base")
+decoder_kwargs = dict(
+    container=av.open(file, metadata_errors="ignore"),
+    sampling_rate=1,
+    num_frames=model.config.num_frames,
+    clip_idx=0,
+    num_clips=1,
+    target_fps=3,
+)
+raw_sampled_frms = decode(**decoder_kwargs)
+text = "a person is sitting on a bed."
+processor = AutoProcessor.from_pretrained("Intel/tvp-base")
+model_inputs = processor(
+    text=[text], videos=list(raw_sampled_frms), return_tensors="pt", max_text_length=100#, size=size
+)
+model_inputs["pixel_values"] = model_inputs["pixel_values"].to(model.dtype)
+output = model(**model_inputs)
+def get_video_duration(filename):
+    cap = cv2.VideoCapture(filename)
+    if cap.isOpened():
+        rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f545b46336cc75987fef66e3fdd71200c5947ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_5.txt
@@ -0,0 +1,8 @@
+rate = cap.get(5)
+        frame_num = cap.get(7)
+        duration = frame_num/rate
+        return duration
+    return -1
+duration = get_video_duration(file)
+start, end = processor.post_process_video_grounding(output.logits, duration)
+print(f"The time slot of the video corresponding to the text \"{text}\" is from {start}s to {end}s")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9bbef1e72ee109f97f4cff734cae8b4c0f5057
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_tvp.txt_chunk_6.txt
@@ -0,0 +1,20 @@
+Tips:
+
+This implementation of TVP uses [BertTokenizer] to generate text embeddings and Resnet-50 model to compute visual embeddings.
+Checkpoints for pre-trained tvp-base is released.
+Please refer to Table 2 for TVP's performance on Temporal Video Grounding task.
+
+TvpConfig
+[[autodoc]] TvpConfig
+TvpImageProcessor
+[[autodoc]] TvpImageProcessor
+    - preprocess
+TvpProcessor
+[[autodoc]] TvpProcessor
+    - call
+TvpModel
+[[autodoc]] TvpModel
+    - forward
+TvpForVideoGrounding
+[[autodoc]] TvpForVideoGrounding
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4033d0369e2d8492e0b1844816e36205a8e9a259
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+UDOP
+Overview
+The UDOP model was proposed in Unifying Vision, Text, and Layout for Universal Document Processing by Zineng Tang, Ziyi Yang, Guoxin Wang, Yuwei Fang, Yang Liu, Chenguang Zhu, Michael Zeng, Cha Zhang, Mohit Bansal.
+UDOP adopts an encoder-decoder Transformer architecture based on T5 for document AI tasks like document image classification, document parsing and document visual question answering.
+The abstract from the paper is the following:
+We propose Universal Document Processing (UDOP), a foundation Document AI model which unifies text, image, and layout modalities together with varied task formats, including document understanding and generation. UDOP leverages the spatial correlation between textual content and document image to model image, text, and layout modalities with one uniform representation. With a novel Vision-Text-Layout Transformer, UDOP unifies pretraining and multi-domain downstream tasks into a prompt-based sequence generation scheme. UDOP is pretrained on both large-scale unlabeled document corpora using innovative self-supervised objectives and diverse labeled data. UDOP also learns to generate document images from text and layout modalities via masked image reconstruction. To the best of our knowledge, this is the first time in the field of document AI that one model simultaneously achieves high-quality neural document editing and content customization. Our method sets the state-of-the-art on 9 Document AI tasks, e.g., document understanding and QA, across diverse data domains like finance reports, academic papers, and websites. UDOP ranks first on the leaderboard of the Document Understanding Benchmark (DUE).*
+
+ UDOP architecture. Taken from the original paper. 
+Usage tips
+
+In addition to input_ids, [UdopForConditionalGeneration] also expects the input bbox, which are
+  the bounding boxes (i.e. 2D-positions) of the input tokens. These can be obtained using an external OCR engine such
+  as Google's Tesseract (there's a Python wrapper available). Each bounding box should be in (x0, y0, x1, y1) format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1, y1) represents the
+  position of the lower right corner. Note that one first needs to normalize the bounding boxes to be on a 0-1000
+  scale. To normalize, you can use the following function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b36636c1501a66a52c42c8287a7c9550549eb28
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+python
+def normalize_bbox(bbox, width, height):
+    return [
+        int(1000 * (bbox[0] / width)),
+        int(1000 * (bbox[1] / height)),
+        int(1000 * (bbox[2] / width)),
+        int(1000 * (bbox[3] / height)),
+    ]
+Here, width and height correspond to the width and height of the original document in which the token
+occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+thon
+from PIL import Image
+Document can be a png, jpg, etc. PDFs must be converted to images.
+image = Image.open(name_of_your_document).convert("RGB")
+width, height = image.size
+
+One can use [UdopProcessor] to prepare images and text for the model, which takes care of all of this. By default, this class uses the Tesseract engine to extract a list of words and boxes (coordinates) from a given document. Its functionality is equivalent to that of [LayoutLMv3Processor], hence it supports passing either apply_ocr=False in case you prefer to use your own OCR engine or apply_ocr=True in case you want the default OCR engine to be used. Refer to the usage guide of LayoutLMv2 regarding all possible use cases (the functionality of UdopProcessor is identical).
+
+If using an own OCR engine of choice, one recommendation is Azure's Read API, which supports so-called line segments. Use of segment position embeddings typically results in better performance.
+At inference time, it's recommended to use the generate method to autoregressively generate text given a document image.
+The model has been pre-trained on both self-supervised and supervised objectives. One can use the various task prefixes (prompts) used during pre-training to test out the out-of-the-box capabilities. For instance, the model can be prompted with "Question answering. What is the date?", as "Question answering." is the task prefix used during pre-training for DocVQA. Refer to the paper (table 1) for all task prefixes.
+One can also fine-tune [UdopEncoderModel], which is the encoder-only part of UDOP, which can be seen as a LayoutLMv3-like Transformer encoder. For discriminative tasks, one can just add a linear classifier on top of it and fine-tune it on a labeled dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9389853d2e25aa4fd194d3de84d7bd545799bf63
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_udop.txt_chunk_2.txt
@@ -0,0 +1,33 @@
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UDOP. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Demo notebooks regarding UDOP can be found here that show how
+to fine-tune UDOP on a custom dataset as well as inference. 🌎
+Document question answering task guide
+
+UdopConfig
+[[autodoc]] UdopConfig
+UdopTokenizer
+[[autodoc]] UdopTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+UdopTokenizerFast
+[[autodoc]] UdopTokenizerFast
+UdopProcessor
+[[autodoc]] UdopProcessor
+    - call
+UdopModel
+[[autodoc]] UdopModel
+    - forward
+UdopForConditionalGeneration
+[[autodoc]] UdopForConditionalGeneration
+    - forward
+UdopEncoderModel
+[[autodoc]] UdopEncoderModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b4f364bf04051c68f68eb134b03e593e34eaf0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_0.txt
@@ -0,0 +1,11 @@
+UL2
+Overview
+The T5 model was presented in Unifying Language Learning Paradigms by Yi Tay, Mostafa Dehghani, Vinh Q. Tran, Xavier Garcia, Dara Bahri, Tal Schuster, Huaixiu Steven Zheng, Neil Houlsby, Donald Metzler.
+The abstract from the paper is the following:
+Existing pre-trained models are generally geared towards a particular class of problems. To date, there seems to be still no consensus on what the right architecture and pre-training setup should be. This paper presents a unified framework for pre-training models that are universally effective across datasets and setups. We begin by disentangling architectural archetypes with pre-training objectives -- two concepts that are commonly conflated. Next, we present a generalized and unified perspective for self-supervision in NLP and show how different pre-training objectives can be cast as one another and how interpolating between different objectives can be effective. We then propose Mixture-of-Denoisers (MoD), a pre-training objective that combines diverse pre-training paradigms together. We furthermore introduce a notion of mode switching, wherein downstream fine-tuning is associated with specific pre-training schemes. We conduct extensive ablative experiments to compare multiple pre-training objectives and find that our method pushes the Pareto-frontier by outperforming T5 and/or GPT-like models across multiple diverse setups. Finally, by scaling our model up to 20B parameters, we achieve SOTA performance on 50 well-established supervised NLP tasks ranging from language generation (with automated and human evaluation), language understanding, text classification, question answering, commonsense reasoning, long text reasoning, structured knowledge grounding and information retrieval. Our model also achieve strong results at in-context learning, outperforming 175B GPT-3 on zero-shot SuperGLUE and tripling the performance of T5-XXL on one-shot summarization.
+This model was contributed by DanielHesslow. The original code can be found here.
+Usage tips
+
+UL2 is an encoder-decoder model pre-trained on a mixture of denoising functions as well as fine-tuned on an array of downstream tasks.
+UL2 has the same architecture as T5v1.1 but uses the Gated-SiLU activation function instead of Gated-GELU.
+The authors release checkpoints of one architecture which can be seen here
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ac1dca9592f34d4946f92a00d006c70e4a3b236
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_ul2.txt_chunk_1.txt
@@ -0,0 +1 @@
+As UL2 has the same architecture as T5v1.1,  refer to T5's documentation page for API reference, tips, code examples and notebooks.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..761fcef7534efcdb34b529cd749eab734972a5d8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+UMT5
+
+Overview
+The UMT5 model was proposed in UniMax: Fairer and More Effective Language Sampling for Large-Scale Multilingual Pretraining by Hyung Won Chung, Xavier Garcia, Adam Roberts, Yi Tay, Orhan Firat, Sharan Narang, Noah Constant.
+The abstract from the paper is the following:
+Pretrained multilingual large language models have typically used heuristic temperature-based sampling to balance between different languages. However previous work has not systematically evaluated the efficacy of different pretraining language distributions across model scales. In this paper, we propose a new sampling method, UniMax, that delivers more uniform coverage of head languages while mitigating overfitting on tail languages by explicitly capping the number of repeats over each language's corpus. We perform an extensive series of ablations testing a range of sampling strategies on a suite of multilingual benchmarks, while varying model scale. We find that UniMax outperforms standard temperature-based sampling, and the benefits persist as scale increases. As part of our contribution, we release: (i) an improved and refreshed mC4 multilingual corpus consisting of 29 trillion characters across 107 languages, and (ii) a suite of pretrained umT5 model checkpoints trained with UniMax sampling.
+Google has released the following variants:
+
+google/umt5-small
+google/umt5-base
+google/umt5-xl
+google/umt5-xxl.
+
+This model was contributed by agemagician and stefan-it. The original code can be
+found here.
+Usage tips
+
+UMT5 was only pre-trained on mC4 excluding any supervised training.
+Therefore, this model has to be fine-tuned before it is usable on a downstream task, unlike the original T5 model.
+Since umT5 was pre-trained in an unsupervised manner, there's no real advantage to using a task prefix during single-task
+fine-tuning. If you are doing multi-task fine-tuning, you should use a prefix.
+
+Differences with mT5?
+UmT5 is based on mT5, with a non-shared relative positional bias that is computed for each layer. This means that the model set has_relative_bias for each layer.
+The conversion script is also different because the model was saved in t5x's latest checkpointing format.
+Sample usage
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2aec51635e59b29f3bc793c42d2aec636c2925c3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_umt5.txt_chunk_1.txt
@@ -0,0 +1,34 @@
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM.from_pretrained("google/umt5-small")
+tokenizer = AutoTokenizer.from_pretrained("google/umt5-small")
+inputs = tokenizer(
+     "A  walks into a bar and orders a  with  pinch of .",
+     return_tensors="pt",
+ )
+outputs = model.generate(**inputs)
+print(tokenizer.batch_decode(outputs))
+['nyone who drink a alcohol A A. This I']
+
+ 
+Refer to T5's documentation page for more tips, code examples and notebooks.
+
+UMT5Config
+[[autodoc]] UMT5Config
+UMT5Model
+[[autodoc]] UMT5Model
+    - forward
+UMT5ForConditionalGeneration
+[[autodoc]] UMT5ForConditionalGeneration
+    - forward
+UMT5EncoderModel
+[[autodoc]] UMT5EncoderModel
+    - forward
+UMT5ForSequenceClassification
+[[autodoc]] UMT5ForSequenceClassification
+    - forward
+UMT5ForTokenClassification
+[[autodoc]] UMT5ForTokenClassification
+    - forward
+UMT5ForQuestionAnswering
+[[autodoc]] UMT5ForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bc6f7dfd91cadaa8754df47b3d74237ca230bbc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+UniSpeech-SAT
+Overview
+The UniSpeech-SAT model was proposed in UniSpeech-SAT: Universal Speech Representation Learning with Speaker Aware
+Pre-Training by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen,
+Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu .
+The abstract from the paper is the following:
+Self-supervised learning (SSL) is a long-standing goal for speech processing, since it utilizes large-scale unlabeled
+data and avoids extensive human labeling. Recent years witness great successes in applying self-supervised learning in
+speech recognition, while limited exploration was attempted in applying SSL for modeling speaker characteristics. In
+this paper, we aim to improve the existing SSL framework for speaker representation learning. Two methods are
+introduced for enhancing the unsupervised speaker information extraction. First, we apply the multi-task learning to
+the current SSL framework, where we integrate the utterance-wise contrastive loss with the SSL objective function.
+Second, for better speaker discrimination, we propose an utterance mixing strategy for data augmentation, where
+additional overlapped utterances are created unsupervisedly and incorporate during training. We integrate the proposed
+methods into the HuBERT framework. Experiment results on SUPERB benchmark show that the proposed system achieves
+state-of-the-art performance in universal representation learning, especially for speaker identification oriented
+tasks. An ablation study is performed verifying the efficacy of each proposed method. Finally, we scale up training
+dataset to 94 thousand hours public audio data and achieve further performance improvement in all SUPERB tasks.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeechSat is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+  Please use [Wav2Vec2Processor] for the feature extraction.
+UniSpeechSat model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+UniSpeechSat performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076493690923c99f76babb5f8b9f99afa8d888a4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech-sat.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechSatConfig
+[[autodoc]] UniSpeechSatConfig
+UniSpeechSat specific outputs
+[[autodoc]] models.unispeech_sat.modeling_unispeech_sat.UniSpeechSatForPreTrainingOutput
+UniSpeechSatModel
+[[autodoc]] UniSpeechSatModel
+    - forward
+UniSpeechSatForCTC
+[[autodoc]] UniSpeechSatForCTC
+    - forward
+UniSpeechSatForSequenceClassification
+[[autodoc]] UniSpeechSatForSequenceClassification
+    - forward
+UniSpeechSatForAudioFrameClassification
+[[autodoc]] UniSpeechSatForAudioFrameClassification
+    - forward
+UniSpeechSatForXVector
+[[autodoc]] UniSpeechSatForXVector
+    - forward
+UniSpeechSatForPreTraining
+[[autodoc]] UniSpeechSatForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f2d053aa504a812a3897d72bbd2455428b56ef7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_unispeech.txt_chunk_0.txt
@@ -0,0 +1,44 @@
+UniSpeech
+Overview
+The UniSpeech model was proposed in UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael
+Zeng, Xuedong Huang .
+The abstract from the paper is the following:
+In this paper, we propose a unified pre-training approach called UniSpeech to learn speech representations with both
+unlabeled and labeled data, in which supervised phonetic CTC learning and phonetically-aware contrastive
+self-supervised learning are conducted in a multi-task learning manner. The resultant representations can capture
+information more correlated with phonetic structures and improve the generalization across languages and domains. We
+evaluate the effectiveness of UniSpeech for cross-lingual representation learning on public CommonVoice corpus. The
+results show that UniSpeech outperforms self-supervised pretraining and supervised transfer learning for speech
+recognition by a maximum of 13.4% and 17.8% relative phone error rate reductions respectively (averaged over all
+testing languages). The transferability of UniSpeech is also demonstrated on a domain-shift speech recognition task,
+i.e., a relative word error rate reduction of 6% against the previous approach.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+UniSpeech is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please
+  use [Wav2Vec2Processor] for the feature extraction.
+UniSpeech model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+UniSpeechConfig
+[[autodoc]] UniSpeechConfig
+UniSpeech specific outputs
+[[autodoc]] models.unispeech.modeling_unispeech.UniSpeechForPreTrainingOutput
+UniSpeechModel
+[[autodoc]] UniSpeechModel
+    - forward
+UniSpeechForCTC
+[[autodoc]] UniSpeechForCTC
+    - forward
+UniSpeechForSequenceClassification
+[[autodoc]] UniSpeechForSequenceClassification
+    - forward
+UniSpeechForPreTraining
+[[autodoc]] UniSpeechForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b5592f853b5ab1e54aabafcc0cc5d19c9f3f60a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_0.txt
@@ -0,0 +1,7 @@
+UnivNet
+Overview
+The UnivNet model was proposed in UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation by Won Jang, Dan Lim, Jaesam Yoon, Bongwan Kin, and Juntae Kim.
+The UnivNet model is a generative adversarial network (GAN) trained to synthesize high fidelity speech waveforms. The UnivNet model shared in transformers is the generator, which maps a conditioning log-mel spectrogram and optional noise sequence to a speech waveform (e.g. a vocoder). Only the generator is required for inference. The discriminator used to train the generator is not implemented.
+The abstract from the paper is the following:
+Most neural vocoders employ band-limited mel-spectrograms to generate waveforms. If full-band spectral features are used as the input, the vocoder can be provided with as much acoustic information as possible. However, in some models employing full-band mel-spectrograms, an over-smoothing problem occurs as part of which non-sharp spectrograms are generated. To address this problem, we propose UnivNet, a neural vocoder that synthesizes high-fidelity waveforms in real time. Inspired by works in the field of voice activity detection, we added a multi-resolution spectrogram discriminator that employs multiple linear spectrogram magnitudes computed using various parameter sets. Using full-band mel-spectrograms as input, we expect to generate high-resolution signals by adding a discriminator that employs spectrograms of multiple resolutions as the input. In an evaluation on a dataset containing information on hundreds of speakers, UnivNet obtained the best objective and subjective results among competing models for both seen and unseen speakers. These results, including the best subjective score for text-to-speech, demonstrate the potential for fast adaptation to new speakers without a need for training from scratch.
+Tips:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bf907d3832db783d9fffb9d436bab3ca35c5739
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+The noise_sequence argument for [UnivNetModel.forward] should be standard Gaussian noise (such as from torch.randn) of shape ([batch_size], noise_length, model.config.model_in_channels), where noise_length should match the length dimension (dimension 1) of the input_features argument. If not supplied, it will be randomly generated; a torch.Generator can be supplied to the generator argument so that the forward pass can be reproduced. (Note that [UnivNetFeatureExtractor] will return generated noise by default, so it shouldn't be necessary to generate noise_sequence manually.)
+Padding added by [UnivNetFeatureExtractor] can be removed from the [UnivNetModel] output through the [UnivNetFeatureExtractor.batch_decode] method, as shown in the usage example below.
+Padding the end of each waveform with silence can reduce artifacts at the end of the generated audio sample. This can be done by supplying pad_end = True to [UnivNetFeatureExtractor.__call__]. See this issue for more details.
+
+Usage Example:
+thon
+import torch
+from scipy.io.wavfile import write
+from datasets import Audio, load_dataset
+from transformers import UnivNetFeatureExtractor, UnivNetModel
+model_id_or_path = "dg845/univnet-dev"
+model = UnivNetModel.from_pretrained(model_id_or_path)
+feature_extractor = UnivNetFeatureExtractor.from_pretrained(model_id_or_path)
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+Resample the audio to the model and feature extractor's sampling rate.
+ds = ds.cast_column("audio", Audio(sampling_rate=feature_extractor.sampling_rate))
+Pad the end of the converted waveforms to reduce artifacts at the end of the output audio samples.
+inputs = feature_extractor(
+    ds[0]["audio"]["array"], sampling_rate=ds[0]["audio"]["sampling_rate"], pad_end=True, return_tensors="pt"
+)
+with torch.no_grad():
+    audio = model(**inputs)
+Remove the extra padding at the end of the output.
+audio = feature_extractor.batch_decode(**audio)[0]
+Convert to wav file
+write("sample_audio.wav", feature_extractor.sampling_rate, audio)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..baefc2171adf295361be6ba04867ad7fb8fca385
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_univnet.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+This model was contributed by dg845.
+To the best of my knowledge, there is no official code release, but an unofficial implementation can be found at maum-ai/univnet with pretrained checkpoints here.
+UnivNetConfig
+[[autodoc]] UnivNetConfig
+UnivNetFeatureExtractor
+[[autodoc]] UnivNetFeatureExtractor
+    - call
+UnivNetModel
+[[autodoc]] UnivNetModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5f0ed14cffd6c2ceaa4cf0797144ebf8b6b7e6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+UPerNet
+Overview
+The UPerNet model was proposed in Unified Perceptual Parsing for Scene Understanding
+by Tete Xiao, Yingcheng Liu, Bolei Zhou, Yuning Jiang, Jian Sun. UPerNet is a general framework to effectively segment
+a wide range of concepts from images, leveraging any vision backbone like ConvNeXt or Swin.
+The abstract from the paper is the following:
+Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes.
+
+ UPerNet framework. Taken from the original paper. 
+This model was contributed by nielsr. The original code is based on OpenMMLab's mmsegmentation here.
+Usage examples
+UPerNet is a general framework for semantic segmentation. It can be used with any vision backbone, like so:
+
+from transformers import SwinConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = SwinConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+To use another vision backbone, like ConvNeXt, simply instantiate the model with the appropriate backbone:
+
+from transformers import ConvNextConfig, UperNetConfig, UperNetForSemanticSegmentation
+backbone_config = ConvNextConfig(out_features=["stage1", "stage2", "stage3", "stage4"])
+config = UperNetConfig(backbone_config=backbone_config)
+model = UperNetForSemanticSegmentation(config)
+
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b496e21161eebc4bf2c52ded26e1ea051037a8bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_upernet.txt_chunk_1.txt
@@ -0,0 +1,14 @@
+Note that this will randomly initialize all the weights of the model.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with UPerNet.
+
+Demo notebooks for UPerNet can be found here.
+[UperNetForSemanticSegmentation] is supported by this example script and notebook.
+See also: Semantic segmentation task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+UperNetConfig
+[[autodoc]] UperNetConfig
+UperNetForSemanticSegmentation
+[[autodoc]] UperNetForSemanticSegmentation
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f44cac0e2a0c43941e02a144af61031b39e6084c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+VAN
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.30.0.
+You can do so by running the following command: pip install -U transformers==4.30.0.
+
+Overview
+The VAN model was proposed in Visual Attention Network by Meng-Hao Guo, Cheng-Ze Lu, Zheng-Ning Liu, Ming-Ming Cheng, Shi-Min Hu.
+This paper introduces a new attention layer based on convolution operations able to capture both local and distant relationships. This is done by combining normal and large kernel convolution layers. The latter uses a dilated convolution to capture distant correlations.
+The abstract from the paper is the following:
+While originally designed for natural language processing tasks, the self-attention mechanism has recently taken various computer vision areas by storm. However, the 2D nature of images brings three challenges for applying self-attention in computer vision. (1) Treating images as 1D sequences neglects their 2D structures. (2) The quadratic complexity is too expensive for high-resolution images. (3) It only captures spatial adaptability but ignores channel adaptability. In this paper, we propose a novel large kernel attention (LKA) module to enable self-adaptive and long-range correlations in self-attention while avoiding the above issues. We further introduce a novel neural network based on LKA, namely Visual Attention Network (VAN). While extremely simple, VAN outperforms the state-of-the-art vision transformers and convolutional neural networks with a large margin in extensive experiments, including image classification, object detection, semantic segmentation, instance segmentation, etc. Code is available at this https URL.
+Tips:
+
+VAN does not have an embedding layer, thus the hidden_states will have a length equal to the number of stages.
+
+The figure below illustrates the architecture of a Visual Attention Layer. Taken from the original paper.
+
+This model was contributed by Francesco. The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VAN.
+
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a606cea1f7973cc37b9b38c89aa98c2dcefec5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_van.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+[VanForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+VanConfig
+[[autodoc]] VanConfig
+VanModel
+[[autodoc]] VanModel
+    - forward
+VanForImageClassification
+[[autodoc]] VanForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..02575b3adea3cabb432496999839349138a9a069
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+Video-LLaVA
+Overview
+Video-LLaVa is an open-source multimodal LLM trained by fine-tuning LlamA/Vicuna on multimodal instruction-following data generated by Llava1.5 and VideChat. It is an auto-regressive language model, based on the transformer architecture. Video-LLaVa unifies visual representations to the language feature space, and enables an LLM to perform visual reasoning capabilities on both images and videos simultaneously.
+The Video-LLaVA model was proposed in Video-LLaVA: Learning United Visual Representation by Alignment Before Projection by Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munang Ning, Peng Jin, Li Yuan.
+The abstract from the paper is the following:
+The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in
+visual-language understanding. Most existing approaches
+encode images and videos into separate feature spaces,
+which are then fed as inputs to large language models.
+However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it
+becomes challenging for a Large Language Model (LLM)
+to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA,
+which learns from a mixed dataset of images and videos,
+mutually enhancing each other. Video-LLaVA achieves superior performances on a broad range of 9 image benchmarks across 5 image question-answering datasets and 4
+image benchmark toolkits. Additionally, our Video-LLaVA
+also outperforms Video-ChatGPT by 5.8%, 9.9%, 18.6%,
+and 10.1% on MSRVTT, MSVD, TGIF, and ActivityNet, respectively. Notably, extensive experiments demonstrate that
+Video-LLaVA mutually benefits images and videos within
+a unified visual representation, outperforming models designed specifically for images or videos. We aim for this
+work to provide modest insights into the multi-modal inputs
+for the LLM
+Usage tips:
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83dca64a69ac5ab704a11afa49f61e61ba48ed45
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_1.txt
@@ -0,0 +1,5 @@
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images/videos in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+Note that the video inputs should have exactly 8 frames at the input, since the models were trained in that setting.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e012604b45e5bb1e57abaf1aa74e34fc21ea4256
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+This model was contributed by RaushanTurganbay.
+The original code can be found here.
+Usage example
+Single Media Mode
+The model can accept both images and videos as input. Here's an example code for inference in half-precision (torch.float16):
+thon
+import av
+import torch
+import numpy as np
+from transformers import VideoLlavaForConditionalGeneration, VideoLlavaProcessor
+def read_video_pyav(container, indices):
+    '''
+    Decode the video with PyAV decoder.
+    Args:
+        container (av.container.input.InputContainer): PyAV container.
+        indices (List[int]): List of frame indices to decode.
+    Returns:
+        result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
+    '''
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+Load the model in half-precision
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", torch_dtype=torch.float16, device_map="auto")
+processor = VideoLlavaProcessor.from_pretrained("LanguageBind/Video-LLaVA-7B-hf")
+Load the video as an np.arrau, sampling uniformly 8 frames
+video_path = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
+container = av.open(video_path)
+total_frames = container.streams.video[0].frames
+indices = np.arange(0, total_frames, total_frames / 8).astype(int)
+video = read_video_pyav(container, indices)
+For better results, we recommend to prompt the model in the following format
+prompt = "USER: Why is this funny? ASSISTANT:"
+inputs = processor(text=prompt, videos=video, return_tensors="pt")
+out = model.generate(**inputs, max_new_tokens=60)
+processor.batch_decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+For multiple turns conversation change the prompt format to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c4c015776bf206a94eb1aac8d7f2a528b634608
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_3.txt
@@ -0,0 +1,31 @@
+For multiple turns conversation change the prompt format to:
+
+"USER: <video>What do you see in this video? ASSISTANT: A baby reading a book. USER: Why is the it funny? ASSISTANT:"
+Mixed Media Mode
+The model can also generate from an interleaved image-video inputs. However note, that it was not trained in interleaved image-video setting which might affect the performance. Below is an example usage for mixed media input, add the following lines to the above code snippet: 
+thon
+from PIL import Image
+import requests
+Generate from image and video mixed inputs
+Load and image and write a new prompt
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+prompt = "USER:  How many cats are there in the image? ASSISTANT: There are two cats. USER: Why is this video funny? ASSISTANT:"
+inputs = processor(text=prompt, images=image, videos=clip, padding=True, return_tensors="pt")
+Generate
+generate_ids = model.generate(**inputs, max_length=50)
+processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+
+Model optimization
+Quantization using Bitsandbytes for memory efficiency
+The model can be loaded in lower bits, significantly reducing memory burden while maintaining the performance of the original model. his allows for efficient deployment on resource-constrained cases. 
+First make sure to install bitsandbytes by running pip install bitsandbytes and to have access to a CUDA compatible GPU device. Load the quantized model by simply adding BitsAndBytesConfig as shown below:
+thon
+from transformers import VideoLlavaForConditionalGeneration, BitsAndBytesConfig
+specify how to quantize the model
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+)
+model = VideoLlavaForConditionalGeneration.from_pretrained("LanguageBind/Video-LLaVA-7B-hf", quantization_config=quantization_config, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d747adf3cfb73798f525cb611672b17131c01b9c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_video_llava.txt_chunk_4.txt
@@ -0,0 +1,24 @@
+Flash-Attention 2 to speed-up generation
+Additionally, we can greatly speed-up model inference by using Flash Attention, which is a faster implementation of the attention mechanism used inside the model.
+First, make sure to install the latest version of Flash Attention 2:
+
+pip install -U flash-attn --no-build-isolation
+Also, you should have a hardware that is compatible with Flash-Attention 2. Read more about it in the official documentation of the flash attention repository. FlashAttention-2 can only be used when a model is loaded in torch.float16 or torch.bfloat16.
+To load and run a model using Flash Attention-2, simply add attn_implementation="flash_attention_2" when loading the model as follows:
+thon
+from transformers import VideoLlavaForConditionalGeneration
+model = VideoLlavaForConditionalGeneration.from_pretrained(
+    "LanguageBind/Video-LLaVA-7B-hf", 
+    torch_dtype=torch.float16, 
+    attn_implementation="flash_attention_2",
+).to(0)
+
+VideoLlavaConfig
+[[autodoc]] VideoLlavaConfig
+VideoLlavaImageProcessor
+[[autodoc]] VideoLlavaImageProcessor
+VideoLlavaProcessor
+[[autodoc]] VideoLlavaProcessor
+VideoLlavaForConditionalGeneration
+[[autodoc]] VideoLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27c94f2fd5a9f055524dd4127271b923e6a6e1c2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+VideoMAE
+Overview
+The VideoMAE model was proposed in VideoMAE: Masked Autoencoders are Data-Efficient Learners for Self-Supervised Video Pre-Training by Zhan Tong, Yibing Song, Jue Wang, Limin Wang.
+VideoMAE extends masked auto encoders (MAE) to video, claiming state-of-the-art performance on several video classification benchmarks.
+The abstract from the paper is the following:
+Pre-training video transformers on extra large-scale datasets is generally required to achieve premier performance on relatively small datasets. In this paper, we show that video masked autoencoders (VideoMAE) are data-efficient learners for self-supervised video pre-training (SSVP). We are inspired by the recent ImageMAE and propose customized video tube masking and reconstruction. These simple designs turn out to be effective for overcoming information leakage caused by the temporal correlation during video reconstruction. We obtain three important findings on SSVP: (1) An extremely high proportion of masking ratio (i.e., 90% to 95%) still yields favorable performance of VideoMAE. The temporally redundant video content enables higher masking ratio than that of images. (2) VideoMAE achieves impressive results on very small datasets (i.e., around 3k-4k videos) without using any extra data. This is partially ascribed to the challenging task of video reconstruction to enforce high-level structure learning. (3) VideoMAE shows that data quality is more important than data quantity for SSVP. Domain shift between pre-training and target datasets are important issues in SSVP. Notably, our VideoMAE with the vanilla ViT backbone can achieve 83.9% on Kinects-400, 75.3% on Something-Something V2, 90.8% on UCF101, and 61.1% on HMDB51 without using any extra data.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847cd8149a4f99625d9d8752c18bee05f2d5d1ae
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+VideoMAE pre-training. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import VideoMAEForVideoClassification
+model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e6757219a50f60d8d77d91a1fb379f0da136bad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_videomae.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and MCG-NJU/videomae-base-finetuned-kinetics model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        37 |                                        10 |                      3.7  |
+|            2 |                                        24 |                                        18 |                      1.33 |
+|            4 |                                        43 |                                        32 |                      1.34 |
+|            8 |                                        84 |                                        60 |                      1.4  |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with VideoMAE. If
+you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll
+review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+Video classification
+- A notebook that shows how
+to fine-tune a VideoMAE model on a custom dataset.
+- Video classification task guide
+- A 🤗 Space showing how to perform inference with a video classification model.
+VideoMAEConfig
+[[autodoc]] VideoMAEConfig
+VideoMAEFeatureExtractor
+[[autodoc]] VideoMAEFeatureExtractor
+    - call
+VideoMAEImageProcessor
+[[autodoc]] VideoMAEImageProcessor
+    - preprocess
+VideoMAEModel
+[[autodoc]] VideoMAEModel
+    - forward
+VideoMAEForPreTraining
+VideoMAEForPreTraining includes the decoder on top for self-supervised pre-training.
+[[autodoc]] transformers.VideoMAEForPreTraining
+    - forward
+VideoMAEForVideoClassification
+[[autodoc]] transformers.VideoMAEForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c67d83e06c288226a9b99160604ddfb5b2a3a59
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+ViLT
+Overview
+The ViLT model was proposed in ViLT: Vision-and-Language Transformer Without Convolution or Region Supervision
+by Wonjae Kim, Bokyung Son, Ildoo Kim. ViLT incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design
+for Vision-and-Language Pre-training (VLP).
+The abstract from the paper is the following:
+Vision-and-Language Pre-training (VLP) has improved performance on various joint vision-and-language downstream tasks.
+Current approaches to VLP heavily rely on image feature extraction processes, most of which involve region supervision
+(e.g., object detection) and the convolutional architecture (e.g., ResNet). Although disregarded in the literature, we
+find it problematic in terms of both (1) efficiency/speed, that simply extracting input features requires much more
+computation than the multimodal interaction steps; and (2) expressive power, as it is upper bounded to the expressive
+power of the visual embedder and its predefined visual vocabulary. In this paper, we present a minimal VLP model,
+Vision-and-Language Transformer (ViLT), monolithic in the sense that the processing of visual inputs is drastically
+simplified to just the same convolution-free manner that we process textual inputs. We show that ViLT is up to tens of
+times faster than previous VLP models, yet with competitive or better downstream task performance.
+
+ ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87da95b4097bbaaa8d95d1093b4840219b0df56d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vilt.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+ViLT architecture. Taken from the original paper. 
+This model was contributed by nielsr. The original code can be found here.
+Usage tips
+
+The quickest way to get started with ViLT is by checking the example notebooks
+  (which showcase both inference and fine-tuning on custom data).
+ViLT is a model that takes both pixel_values and input_ids as input. One can use [ViltProcessor] to prepare data for the model.
+  This processor wraps a image processor (for the image modality) and a tokenizer (for the language modality) into one.
+ViLT is trained with images of various sizes: the authors resize the shorter edge of input images to 384 and limit the longer edge to
+  under 640 while preserving the aspect ratio. To make batching of images possible, the authors use a pixel_mask that indicates
+  which pixel values are real and which are padding. [ViltProcessor] automatically creates this for you.
+The design of ViLT is very similar to that of a standard Vision Transformer (ViT). The only difference is that the model includes
+  additional embedding layers for the language modality.
+The PyTorch version of this model is only available in torch 1.10 and higher.
+
+ViltConfig
+[[autodoc]] ViltConfig
+ViltFeatureExtractor
+[[autodoc]] ViltFeatureExtractor
+    - call
+ViltImageProcessor
+[[autodoc]] ViltImageProcessor
+    - preprocess
+ViltProcessor
+[[autodoc]] ViltProcessor
+    - call
+ViltModel
+[[autodoc]] ViltModel
+    - forward
+ViltForMaskedLM
+[[autodoc]] ViltForMaskedLM
+    - forward
+ViltForQuestionAnswering
+[[autodoc]] ViltForQuestionAnswering
+    - forward
+ViltForImagesAndTextClassification
+[[autodoc]] ViltForImagesAndTextClassification
+    - forward
+ViltForImageAndTextRetrieval
+[[autodoc]] ViltForImageAndTextRetrieval
+    - forward
+ViltForTokenClassification
+[[autodoc]] ViltForTokenClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7937a6b4ba756936f92ad8e9138db348a0ba0c9a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+VipLlava
+Overview
+The VipLlava model was proposed in Making Large Multimodal Models Understand Arbitrary Visual Prompts by Mu Cai, Haotian Liu, Siva Karthik Mustikovela, Gregory P. Meyer, Yuning Chai, Dennis Park, Yong Jae Lee.
+VipLlava enhances the training protocol of Llava by marking images and interact with the model using natural cues like a "red bounding box" or "pointed arrow" during training.
+The abstract from the paper is the following:
+While existing large vision-language multimodal models focus on whole image understanding, there is a prominent gap in achieving region-specific comprehension. Current approaches that use textual coordinates or spatial encodings often fail to provide a user-friendly interface for visual prompting. To address this challenge, we introduce a novel multimodal model capable of decoding arbitrary visual prompts. This allows users to intuitively mark images and interact with the model using natural cues like a "red bounding box" or "pointed arrow". Our simple design directly overlays visual markers onto the RGB image, eliminating the need for complex region encodings, yet achieves state-of-the-art performance on region-understanding tasks like Visual7W, PointQA, and Visual Commonsense Reasoning benchmark. Furthermore, we present ViP-Bench, a comprehensive benchmark to assess the capability of models in understanding visual prompts across multiple dimensions, enabling future research in this domain. Code, data, and model are publicly available.
+Tips:
+
+The architecture is similar than llava architecture except that the multi-modal projector takes a set of concatenated vision hidden states and has an additional layernorm layer on that module.
+
+We advise users to use padding_side="left" when computing batched generation as it leads to more accurate results. Simply make sure to call processor.tokenizer.padding_side = "left" before generating.
+
+Note the model has not been explicitly trained to process multiple images in the same prompt, although this is technically possible, you may experience inaccurate results.
+
+For better results, we recommend users to prompt the model with the correct prompt format: 
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..acb39c2fcd404707cb2b0b5ee9d9218e56d02c95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vipllava.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt>###Assistant:
+For multiple turns conversation:
+
+A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n<prompt1>###Assistant: <answer1>###Human: <prompt2>###Assistant:
+The original code can be found here.
+This model was contributed by Younes Belkada
+VipLlavaConfig
+[[autodoc]] VipLlavaConfig
+VipLlavaForConditionalGeneration
+[[autodoc]] VipLlavaForConditionalGeneration
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccb75d4bd223fd88467b67e547be2ea81b9d8e96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Vision Encoder Decoder Models
+Overview
+The [VisionEncoderDecoderModel] can be used to initialize an image-to-text model with any
+pretrained Transformer-based vision model as the encoder (e.g. ViT, BEiT, DeiT, Swin)
+and any pretrained language model as the decoder (e.g. RoBERTa, GPT2, BERT, DistilBERT).
+The effectiveness of initializing image-to-text-sequence models with pretrained checkpoints has been shown in (for
+example) TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models by Minghao Li, Tengchao Lv, Lei Cui, Yijuan Lu, Dinei Florencio, Cha Zhang,
+Zhoujun Li, Furu Wei.
+After such a [VisionEncoderDecoderModel] has been trained/fine-tuned, it can be saved/loaded just like any other models (see the examples below
+for more information).
+An example application is image captioning, in which the encoder is used to encode the image, after which an autoregressive language model generates
+the caption. Another example is optical character recognition. Refer to TrOCR, which is an instance of [VisionEncoderDecoderModel].
+Randomly initializing VisionEncoderDecoderModel from model configurations.
+[VisionEncoderDecoderModel] can be randomly initialized from an encoder and a decoder config. In the following example, we show how to do this using the default [ViTModel] configuration for the encoder
+and the default [BertForCausalLM] configuration for the decoder.
+thon
+
+from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel
+config_encoder = ViTConfig()
+config_decoder = BertConfig()
+config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
+model = VisionEncoderDecoderModel(config=config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7431495bad35e30c40f9f06ba27c37d11424906
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder.
+[VisionEncoderDecoderModel] can be initialized from a pretrained encoder checkpoint and a pretrained decoder checkpoint. Note that any pretrained Transformer-based vision model, e.g. Swin, can serve as the encoder and both pretrained auto-encoding models, e.g. BERT, pretrained causal language models, e.g. GPT2, as well as the pretrained decoder part of sequence-to-sequence models, e.g. decoder of BART, can be used as the decoder.
+Depending on which architecture you choose as the decoder, the cross-attention layers might be randomly initialized.
+Initializing [VisionEncoderDecoderModel] from a pretrained encoder and decoder checkpoint requires the model to be fine-tuned on a downstream task, as has been shown in the Warm-starting-encoder-decoder blog post.
+To do so, the VisionEncoderDecoderModel class provides a [VisionEncoderDecoderModel.from_encoder_decoder_pretrained] method.
+thon
+
+from transformers import VisionEncoderDecoderModel
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "microsoft/swin-base-patch4-window7-224-in22k", "google-bert/bert-base-uncased"
+ )
+
+Loading an existing VisionEncoderDecoderModel checkpoint and perform inference.
+To load fine-tuned checkpoints of the VisionEncoderDecoderModel class, [VisionEncoderDecoderModel] provides the from_pretrained() method just like any other model architecture in Transformers.
+To perform inference, one uses the [generate] method, which allows to autoregressively generate text. This method supports various forms of decoding, such as greedy, beam search and multinomial sampling.
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e103886ea4d6a6789a5203d70fdaf3d87c338ad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+import requests
+from PIL import Image
+from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
+load a fine-tuned image captioning model and corresponding tokenizer and image processor
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+let's perform inference on an image
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+autoregressively generate caption (uses greedy decoding by default)
+generated_ids = model.generate(pixel_values)
+generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_text)
+a cat laying on a blanket next to a cat laying on a bed
+
+Loading a PyTorch checkpoint into TFVisionEncoderDecoderModel.
+[TFVisionEncoderDecoderModel.from_pretrained] currently doesn't support initializing the model from a
+PyTorch checkpoint. Passing from_pt=True to this method will throw an exception. If there are only PyTorch
+checkpoints for a particular vision encoder-decoder model, a workaround is:
+thon
+
+from transformers import VisionEncoderDecoderModel, TFVisionEncoderDecoderModel
+_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+_model.encoder.save_pretrained("./encoder")
+_model.decoder.save_pretrained("./decoder")
+model = TFVisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "./encoder", "./decoder", encoder_from_pt=True, decoder_from_pt=True
+ )
+This is only for copying some specific attributes of this particular model.
+model.config = _model.config
+
+Training
+Once the model is created, it can be fine-tuned similar to BART, T5 or any other encoder-decoder model on a dataset of (image, text) pairs.
+As you can see, only 2 inputs are required for the model in order to compute a loss: pixel_values (which are the
+images) and labels (which are the input_ids of the encoded target sequence).
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a640b4e757ed9100a59e35a12db5ebb9b458423
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-encoder-decoder.txt_chunk_3.txt
@@ -0,0 +1,38 @@
+from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
+from datasets import load_dataset
+image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
+     "google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
+ )
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+labels = tokenizer(
+     "an image of two cats chilling on a couch",
+     return_tensors="pt",
+ ).input_ids
+the forward function automatically creates the correct decoder_input_ids
+loss = model(pixel_values=pixel_values, labels=labels).loss
+
+This model was contributed by nielsr. This model's TensorFlow and Flax versions
+were contributed by ydshieh.
+VisionEncoderDecoderConfig
+[[autodoc]] VisionEncoderDecoderConfig
+
+VisionEncoderDecoderModel
+[[autodoc]] VisionEncoderDecoderModel
+    - forward
+    - from_encoder_decoder_pretrained
+
+TFVisionEncoderDecoderModel
+[[autodoc]] TFVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
+
+FlaxVisionEncoderDecoderModel
+[[autodoc]] FlaxVisionEncoderDecoderModel
+    - call
+    - from_encoder_decoder_pretrained
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bcd4543da106972394b37d6c5f7f6a3944f93498
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vision-text-dual-encoder.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+VisionTextDualEncoder
+Overview
+The [VisionTextDualEncoderModel] can be used to initialize a vision-text dual encoder model with
+any pretrained vision autoencoding model as the vision encoder (e.g. ViT, BEiT, DeiT) and any pretrained text autoencoding model as the text encoder (e.g. RoBERTa, BERT). Two projection layers are added on top of both the vision and text encoder to project the output embeddings
+to a shared latent space. The projection layers are randomly initialized so the model should be fine-tuned on a
+downstream task. This model can be used to align the vision-text embeddings using CLIP like contrastive image-text
+training and then can be used for zero-shot vision tasks such image-classification or retrieval.
+In LiT: Zero-Shot Transfer with Locked-image Text Tuning it is shown how
+leveraging pre-trained (locked/frozen) image and text model for contrastive learning yields significant improvement on
+new zero-shot vision tasks such as image classification or retrieval.
+VisionTextDualEncoderConfig
+[[autodoc]] VisionTextDualEncoderConfig
+VisionTextDualEncoderProcessor
+[[autodoc]] VisionTextDualEncoderProcessor
+
+VisionTextDualEncoderModel
+[[autodoc]] VisionTextDualEncoderModel
+    - forward
+
+FlaxVisionTextDualEncoderModel
+[[autodoc]] FlaxVisionTextDualEncoderModel
+    - call
+
+TFVisionTextDualEncoderModel
+[[autodoc]] TFVisionTextDualEncoderModel
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..705bb0b30f65bc5f77b5d71d3c091fdefaff9b68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+VisualBERT
+Overview
+The VisualBERT model was proposed in VisualBERT: A Simple and Performant Baseline for Vision and Language by Liunian Harold Li, Mark Yatskar, Da Yin, Cho-Jui Hsieh, Kai-Wei Chang.
+VisualBERT is a neural network trained on a variety of (image, text) pairs.
+The abstract from the paper is the following:
+We propose VisualBERT, a simple and flexible framework for modeling a broad range of vision-and-language tasks.
+VisualBERT consists of a stack of Transformer layers that implicitly align elements of an input text and regions in an
+associated input image with self-attention. We further propose two visually-grounded language model objectives for
+pre-training VisualBERT on image caption data. Experiments on four vision-and-language tasks including VQA, VCR, NLVR2,
+and Flickr30K show that VisualBERT outperforms or rivals with state-of-the-art models while being significantly
+simpler. Further analysis demonstrates that VisualBERT can ground elements of language to image regions without any
+explicit supervision and is even sensitive to syntactic relationships, tracking, for example, associations between
+verbs and image regions corresponding to their arguments.
+This model was contributed by gchhablani. The original code can be found here.
+Usage tips
+
+Most of the checkpoints provided work with the [VisualBertForPreTraining] configuration. Other
+   checkpoints provided are the fine-tuned checkpoints for down-stream tasks - VQA ('visualbert-vqa'), VCR
+   ('visualbert-vcr'), NLVR2 ('visualbert-nlvr2'). Hence, if you are not working on these downstream tasks, it is
+   recommended that you use the pretrained checkpoints.
+
+For the VCR task, the authors use a fine-tuned detector for generating visual embeddings, for all the checkpoints.
+   We do not provide the detector and its weights as a part of the package, but it will be available in the research
+   projects, and the states can be loaded directly into the detector provided.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a8f2150ff55d780bca4a3e4a672e3f4e6bdc120
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+VisualBERT is a multi-modal vision and language model. It can be used for visual question answering, multiple choice,
+visual reasoning and region-to-phrase correspondence tasks. VisualBERT uses a BERT-like transformer to prepare
+embeddings for image-text pairs. Both the text and visual features are then projected to a latent space with identical
+dimension.
+To feed images to the model, each image is passed through a pre-trained object detector and the regions and the
+bounding boxes are extracted. The authors use the features generated after passing these regions through a pre-trained
+CNN like ResNet as visual embeddings. They also add absolute position embeddings, and feed the resulting sequence of
+vectors to a standard BERT model. The text input is concatenated in the front of the visual embeddings in the embedding
+layer, and is expected to be bound by [CLS] and a [SEP] tokens, as in BERT. The segment IDs must also be set
+appropriately for the textual and visual parts.
+The [BertTokenizer] is used to encode the text. A custom detector/image processor must be used
+to get the visual embeddings. The following example notebooks show how to use VisualBERT with Detectron-like models:
+
+VisualBERT VQA demo notebook : This notebook
+  contains an example on VisualBERT VQA.
+
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f074e079cc1700a83c354315b1d09ced10144bda
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_visual_bert.txt_chunk_2.txt
@@ -0,0 +1,45 @@
+Generate Embeddings for VisualBERT (Colab Notebook) : This notebook contains
+  an example on how to generate visual embeddings.
+
+The following example shows how to get the last hidden state using [VisualBertModel]:
+thon
+
+import torch
+from transformers import BertTokenizer, VisualBertModel
+model = VisualBertModel.from_pretrained("uclanlp/visualbert-vqa-coco-pre")
+tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
+inputs = tokenizer("What is the man eating?", return_tensors="pt")
+this is a custom function that returns the visual embeddings given the image path
+visual_embeds = get_visual_embeddings(image_path)
+visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
+visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
+inputs.update(
+     {
+         "visual_embeds": visual_embeds,
+         "visual_token_type_ids": visual_token_type_ids,
+         "visual_attention_mask": visual_attention_mask,
+     }
+ )
+outputs = model(**inputs)
+last_hidden_state = outputs.last_hidden_state
+
+VisualBertConfig
+[[autodoc]] VisualBertConfig
+VisualBertModel
+[[autodoc]] VisualBertModel
+    - forward
+VisualBertForPreTraining
+[[autodoc]] VisualBertForPreTraining
+    - forward
+VisualBertForQuestionAnswering
+[[autodoc]] VisualBertForQuestionAnswering
+    - forward
+VisualBertForMultipleChoice
+[[autodoc]] VisualBertForMultipleChoice
+    - forward
+VisualBertForVisualReasoning
+[[autodoc]] VisualBertForVisualReasoning
+    - forward
+VisualBertForRegionToPhraseAlignment
+[[autodoc]] VisualBertForRegionToPhraseAlignment
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f394d368dabcf621b190a3b6db5eb745a50060f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Vision Transformer (ViT)
+Overview
+The Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+
+ ViT architecture. Taken from the original paper. 
+Following the original Vision Transformer, some follow-up works have been made:
+
+DeiT (Data-efficient Image Transformers) by Facebook AI. DeiT models are distilled vision transformers.
+  The authors of DeiT also released more efficiently trained ViT models, which you can directly plug into [ViTModel] or
+  [ViTForImageClassification]. There are 4 variants available (in 3 different sizes): facebook/deit-tiny-patch16-224,
+  facebook/deit-small-patch16-224, facebook/deit-base-patch16-224 and facebook/deit-base-patch16-384. Note that one should
+  use [DeiTImageProcessor] in order to prepare images for the model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3ab5ca75fd001fdd279e2864ccb723e02d1b648
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_1.txt
@@ -0,0 +1,16 @@
+BEiT (BERT pre-training of Image Transformers) by Microsoft Research. BEiT models outperform supervised pre-trained
+  vision transformers using a self-supervised method inspired by BERT (masked image modeling) and based on a VQ-VAE.
+
+DINO (a method for self-supervised training of Vision Transformers) by Facebook AI. Vision Transformers trained using
+  the DINO method show very interesting properties not seen with convolutional models. They are capable of segmenting
+  objects, without having ever been trained to do so. DINO checkpoints can be found on the hub.
+
+MAE (Masked Autoencoders) by Facebook AI. By pre-training Vision Transformers to reconstruct pixel values for a high portion
+  (75%) of masked patches (using an asymmetric encoder-decoder architecture), the authors show that this simple method outperforms
+  supervised pre-training after fine-tuning.
+
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Note that we converted the weights from Ross Wightman's timm library,
+who already converted the weights from JAX to PyTorch. Credits go to him!
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b233d9f9cef209cb58d9a428e6d04af1ebdce228
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+To feed images to the Transformer encoder, each image is split into a sequence of fixed-size non-overlapping patches,
+  which are then linearly embedded. A [CLS] token is added to serve as representation of an entire image, which can be
+  used for classification. The authors also add absolute position embeddings, and feed the resulting sequence of
+  vectors to a standard Transformer encoder.
+As the Vision Transformer expects each image to be of the same size (resolution), one can use
+  [ViTImageProcessor] to resize (or rescale) and normalize images for the model.
+Both the patch resolution and image resolution used during pre-training or fine-tuning are reflected in the name of
+  each checkpoint. For example, google/vit-base-patch16-224 refers to a base-sized architecture with patch
+  resolution of 16x16 and fine-tuning resolution of 224x224. All checkpoints can be found on the hub.
+The available checkpoints are either (1) pre-trained on ImageNet-21k (a collection of
+  14 million images and 21k classes) only, or (2) also fine-tuned on ImageNet (also referred to as ILSVRC 2012, a collection of 1.3 million
+  images and 1,000 classes).
+The Vision Transformer was pre-trained using a resolution of 224x224. During fine-tuning, it is often beneficial to
+  use a higher resolution than pre-training (Touvron et al., 2019), (Kolesnikov
+  et al., 2020). In order to fine-tune at higher resolution, the authors perform
+  2D interpolation of the pre-trained position embeddings, according to their location in the original image.
+The best results are obtained with supervised pre-training, which is not the case in NLP. The authors also performed
+  an experiment with a self-supervised pre-training objective, namely masked patched prediction (inspired by masked
+  language modeling). With this approach, the smaller ViT-B/16 model achieves 79.9% accuracy on ImageNet, a significant
+  improvement of 2% to training from scratch, but still 4% behind supervised pre-training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..924833263aaf380fef2310a0cbe131e1a9307875
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTForImageClassification
+model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", attn_implementation="sdpa", torch_dtype=torch.float16)
+
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-base-patch16-224 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+Demo notebooks regarding inference as well as fine-tuning ViT on custom data can be found here.
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTForImageClassification is supported by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6cabd0015cfc25a566cf280c68deb85d138e5d0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit.txt_chunk_4.txt
@@ -0,0 +1,55 @@
+A blog post on how to Fine-Tune ViT for Image Classification with Hugging Face Transformers
+A blog post on Image Classification with Hugging Face Transformers and Keras
+A notebook on Fine-tuning for Image Classification with Hugging Face Transformers
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with the Hugging Face Trainer
+A notebook on how to Fine-tune the Vision Transformer on CIFAR-10 with PyTorch Lightning
+
+⚗️ Optimization
+
+A blog post on how to Accelerate Vision Transformer (ViT) with Quantization using Optimum
+
+⚡️ Inference
+
+A notebook on Quick demo: Vision Transformer (ViT) by Google Brain
+
+🚀 Deploy
+
+A blog post on Deploying Tensorflow Vision Models in Hugging Face with TF Serving
+A blog post on Deploying Hugging Face ViT on Vertex AI
+A blog post on Deploying Hugging Face ViT on Kubernetes with TF Serving
+
+ViTConfig
+[[autodoc]] ViTConfig
+ViTFeatureExtractor
+[[autodoc]] ViTFeatureExtractor
+    - call
+ViTImageProcessor
+[[autodoc]] ViTImageProcessor
+    - preprocess
+ViTImageProcessorFast
+[[autodoc]] ViTImageProcessorFast
+    - preprocess
+
+ViTModel
+[[autodoc]] ViTModel
+    - forward
+ViTForMaskedImageModeling
+[[autodoc]] ViTForMaskedImageModeling
+    - forward
+ViTForImageClassification
+[[autodoc]] ViTForImageClassification
+    - forward
+
+TFViTModel
+[[autodoc]] TFViTModel
+    - call
+TFViTForImageClassification
+[[autodoc]] TFViTForImageClassification
+    - call
+
+FlaxVitModel
+[[autodoc]] FlaxViTModel
+    - call
+FlaxViTForImageClassification
+[[autodoc]] FlaxViTForImageClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..134f1b24638dcec90c50ea3b4f3638051b9daf3e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+Hybrid Vision Transformer (ViT Hybrid)
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a03fbb07e2731319454ecc9435f64878c246cfc9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Overview
+The hybrid Vision Transformer (ViT) model was proposed in An Image is Worth 16x16 Words: Transformers for Image Recognition
+at Scale by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk
+Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob
+Uszkoreit, Neil Houlsby. It's the first paper that successfully trains a Transformer encoder on ImageNet, attaining
+very good results compared to familiar convolutional architectures. ViT hybrid is a slight variant of the plain Vision Transformer,
+by leveraging a convolutional backbone (specifically, BiT) whose features are used as initial "tokens" for the Transformer.
+The abstract from the paper is the following:
+While the Transformer architecture has become the de-facto standard for natural language processing tasks, its
+applications to computer vision remain limited. In vision, attention is either applied in conjunction with
+convolutional networks, or used to replace certain components of convolutional networks while keeping their overall
+structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to
+sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of
+data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.),
+Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring
+substantially fewer computational resources to train.
+This model was contributed by nielsr. The original code (written in JAX) can be
+found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c17b1bdc6328f9e2f80be8424b9c21a7a613193e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_2.txt
@@ -0,0 +1,3 @@
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTHybridForImageClassification
+model = ViTHybridForImageClassification.from_pretrained("google/vit-hybrid-base-bit-384", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5128139bbb22f724cff364f91cb7a47dbc9ecefd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_hybrid.txt_chunk_3.txt
@@ -0,0 +1,26 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and google/vit-hybrid-base-bit-384 model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        29 |                                        18 |                      1.61 |
+|            2 |                                        26 |                                        18 |                      1.44 |
+|            4 |                                        25 |                                        18 |                      1.39 |
+|            8 |                                        34 |                                        24 |                      1.42 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT Hybrid.
+
+[ViTHybridForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTHybridConfig
+[[autodoc]] ViTHybridConfig
+ViTHybridImageProcessor
+[[autodoc]] ViTHybridImageProcessor
+    - preprocess
+ViTHybridModel
+[[autodoc]] ViTHybridModel
+    - forward
+ViTHybridForImageClassification
+[[autodoc]] ViTHybridForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d06bb89b7645424f34860e68ca011239b8ece38
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMAE
+Overview
+The ViTMAE model was proposed in Masked Autoencoders Are Scalable Vision Learners by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li,
+Piotr Dollár, Ross Girshick. The paper shows that, by pre-training a Vision Transformer (ViT) to reconstruct pixel values for masked patches, one can get results after
+fine-tuning that outperform supervised pre-training.
+The abstract from the paper is the following:
+This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates
+only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask
+tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs
+enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity
+models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream
+tasks outperforms supervised pre-training and shows promising scaling behavior.
+ 
+ MAE architecture. Taken from the original paper. 
+This model was contributed by nielsr. TensorFlow version of the model was contributed by sayakpaul and 
+ariG23498 (equal contribution). The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08e6b577ced882f2b9070c3b4da9f30c06da3469
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_1.txt
@@ -0,0 +1,20 @@
+MAE (masked auto encoding) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training objective is relatively simple:
+by masking a large portion (75%) of the image patches, the model must reconstruct raw pixel values. One can use [ViTMAEForPreTraining] for this purpose.
+After pre-training, one "throws away" the decoder used to reconstruct pixels, and one uses the encoder for fine-tuning/linear probing. This means that after
+fine-tuning, one can directly plug in the weights into a [ViTForImageClassification].
+One can use [ViTImageProcessor] to prepare images for the model. See the code examples for more info.
+Note that the encoder of MAE is only used to encode the visual patches. The encoded patches are then concatenated with mask tokens, which the decoder (which also
+consists of Transformer blocks) takes as input. Each mask token is a shared, learned vector that indicates the presence of a missing patch to be predicted. Fixed
+sin/cos position embeddings are added both to the input of the encoder and the decoder.
+For a visual understanding of how MAEs work you can check out this post.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMAEModel
+model = ViTMAEModel.from_pretrained("facebook/vit-mae-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7055a765917094722df8c509b48545f2c4cf64
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_mae.txt_chunk_2.txt
@@ -0,0 +1,31 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-mae-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                        11 |                                         6 |                      1.83 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMAE.
+
+[ViTMAEForPreTraining] is supported by this example script, allowing you to pre-train the model from scratch/further pre-train the model on custom data.
+A notebook that illustrates how to visualize reconstructed pixel values with [ViTMAEForPreTraining] can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMAEConfig
+[[autodoc]] ViTMAEConfig
+
+ViTMAEModel
+[[autodoc]] ViTMAEModel
+    - forward
+ViTMAEForPreTraining
+[[autodoc]] transformers.ViTMAEForPreTraining
+    - forward
+
+TFViTMAEModel
+[[autodoc]] TFViTMAEModel
+    - call
+TFViTMAEForPreTraining
+[[autodoc]] transformers.TFViTMAEForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a486fb7b04f59940072bc2d89fc8c9b52f44d70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+ViTMSN
+Overview
+The ViTMSN model was proposed in Masked Siamese Networks for Label-Efficient Learning by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes,
+Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas. The paper presents a joint-embedding architecture to match the prototypes
+of masked patches with that of the unmasked patches. With this setup, their method yields excellent performance in the low-shot and extreme low-shot
+regimes.
+The abstract from the paper is the following:
+We propose Masked Siamese Networks (MSN), a self-supervised learning framework for learning image representations. Our
+approach matches the representation of an image view containing randomly masked patches to the representation of the original
+unmasked image. This self-supervised pre-training strategy is particularly scalable when applied to Vision Transformers since only the
+unmasked patches are processed by the network. As a result, MSNs improve the scalability of joint-embedding architectures,
+while producing representations of a high semantic level that perform competitively on low-shot image classification. For instance,
+on ImageNet-1K, with only 5,000 annotated images, our base MSN model achieves 72.4% top-1 accuracy,
+and with 1% of ImageNet-1K labels, we achieve 75.7% top-1 accuracy, setting a new state-of-the-art for self-supervised learning on this benchmark.
+ 
+ MSN architecture. Taken from the original paper. 
+This model was contributed by sayakpaul. The original code can be found here. 
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e9617786ae6423c124e5d3e90e4983ebd76e67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_1.txt
@@ -0,0 +1,18 @@
+MSN (masked siamese networks) is a method for self-supervised pre-training of Vision Transformers (ViTs). The pre-training
+objective is to match the prototypes assigned to the unmasked views of the images to that of the masked views of the same images.
+The authors have only released pre-trained weights of the backbone (ImageNet-1k pre-training). So, to use that on your own image classification dataset,
+use the [ViTMSNForImageClassification] class which is initialized from [ViTMSNModel]. Follow
+this notebook for a detailed tutorial on fine-tuning.
+MSN is particularly useful in the low-shot and extreme low-shot regimes. Notably, it achieves 75.7% top-1 accuracy with only 1% of ImageNet-1K
+labels when fine-tuned.
+
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import ViTMSNForImageClassification
+model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb0ed4373d676dcec06774c43247fc4ec9f1417
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vit_msn.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and facebook/vit-msn-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                         7 |                                         6 |                      1.17 |
+|            2 |                                         8 |                                         6 |                      1.33 |
+|            4 |                                         8 |                                         6 |                      1.33 |
+|            8 |                                         8 |                                         6 |                      1.33 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViT MSN.
+
+[ViTMSNForImageClassification] is supported by this example script and notebook.
+See also: Image classification task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+ViTMSNConfig
+[[autodoc]] ViTMSNConfig
+ViTMSNModel
+[[autodoc]] ViTMSNModel
+    - forward
+ViTMSNForImageClassification
+[[autodoc]] ViTMSNForImageClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2ef641c5ec0d555718fd59065c82d88ad28c31
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitdet.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+ViTDet
+Overview
+The ViTDet model was proposed in Exploring Plain Vision Transformer Backbones for Object Detection by Yanghao Li, Hanzi Mao, Ross Girshick, Kaiming He.
+VitDet leverages the plain Vision Transformer for the task of object detection.
+The abstract from the paper is the following:
+We explore the plain, non-hierarchical Vision Transformer (ViT) as a backbone network for object detection. This design enables the original ViT architecture to be fine-tuned for object detection without needing to redesign a hierarchical backbone for pre-training. With minimal adaptations for fine-tuning, our plain-backbone detector can achieve competitive results. Surprisingly, we observe: (i) it is sufficient to build a simple feature pyramid from a single-scale feature map (without the common FPN design) and (ii) it is sufficient to use window attention (without shifting) aided with very few cross-window propagation blocks. With plain ViT backbones pre-trained as Masked Autoencoders (MAE), our detector, named ViTDet, can compete with the previous leading methods that were all based on hierarchical backbones, reaching up to 61.3 AP_box on the COCO dataset using only ImageNet-1K pre-training. We hope our study will draw attention to research on plain-backbone detectors.
+This model was contributed by nielsr.
+The original code can be found here.
+Tips:
+
+At the moment, only the backbone is available.
+
+VitDetConfig
+[[autodoc]] VitDetConfig
+VitDetModel
+[[autodoc]] VitDetModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb13fdb75e7e49b474ddb259b5f7e08bffb7c20b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+ViTMatte
+Overview
+The ViTMatte model was proposed in Boosting Image Matting with Pretrained Plain Vision Transformers by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+ViTMatte leverages plain Vision Transformers for the task of image matting, which is the process of accurately estimating the foreground object in images and videos.
+The abstract from the paper is the following:
+Recently, plain vision Transformers (ViTs) have shown impressive performance on various computer vision tasks, thanks to their strong modeling capacity and large-scale pretraining. However, they have not yet conquered the problem of image matting. We hypothesize that image matting could also be boosted by ViTs and present a new efficient and robust ViT-based matting system, named ViTMatte. Our method utilizes (i) a hybrid attention mechanism combined with a convolution neck to help ViTs achieve an excellent performance-computation trade-off in matting tasks. (ii) Additionally, we introduce the detail capture module, which just consists of simple lightweight convolutions to complement the detailed information required by matting. To the best of our knowledge, ViTMatte is the first work to unleash the potential of ViT on image matting with concise adaptation. It inherits many superior properties from ViT to matting, including various pretraining strategies, concise architecture design, and flexible inference strategies. We evaluate ViTMatte on Composition-1k and Distinctions-646, the most commonly used benchmark for image matting, our method achieves state-of-the-art performance and outperforms prior matting works by a large margin.
+This model was contributed by nielsr.
+The original code can be found here.
+
+ ViTMatte high-level overview. Taken from the original paper. 
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with ViTMatte.
+
+A demo notebook regarding inference with [VitMatteForImageMatting], including background replacement, can be found here.
+
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efbf55b0844d42cabb44061490d8c821697e7145
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vitmatte.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+The model expects both the image and trimap (concatenated) as input. Use [ViTMatteImageProcessor] for this purpose.
+
+VitMatteConfig
+[[autodoc]] VitMatteConfig
+VitMatteImageProcessor
+[[autodoc]] VitMatteImageProcessor
+    - preprocess
+VitMatteForImageMatting
+[[autodoc]] VitMatteForImageMatting
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..048261f282699eba61fe5fed358a65e7d119ee34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_0.txt
@@ -0,0 +1,17 @@
+VITS
+Overview
+The VITS model was proposed in Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech by Jaehyeon Kim, Jungil Kong, Juhee Son.
+VITS (Variational Inference with adversarial learning for end-to-end Text-to-Speech) is an end-to-end 
+speech synthesis model that predicts a speech waveform conditional on an input text sequence. It is a conditional variational 
+autoencoder (VAE) comprised of a posterior encoder, decoder, and conditional prior.
+A set of spectrogram-based acoustic features are predicted by the flow-based module, which is formed of a Transformer-based
+text encoder and multiple coupling layers. The spectrogram is decoded using a stack of transposed convolutional layers,
+much in the same style as the HiFi-GAN vocoder. Motivated by the one-to-many nature of the TTS problem, where the same text 
+input can be spoken in multiple ways, the model also includes a stochastic duration predictor, which allows the model to 
+synthesise speech with different rhythms from the same input text. 
+The model is trained end-to-end with a combination of losses derived from variational lower bound and adversarial training. 
+To improve the expressiveness of the model, normalizing flows are applied to the conditional prior distribution. During 
+inference, the text encodings are up-sampled based on the duration prediction module, and then mapped into the 
+waveform using a cascade of the flow module and HiFi-GAN decoder. Due to the stochastic nature of the duration predictor,
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f55ddedc7abd7fc4eca045dc6e959d346d02eed4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_1.txt
@@ -0,0 +1,21 @@
+the model is non-deterministic, and thus requires a fixed seed to generate the same speech waveform.
+The abstract from the paper is the following:
+Several recent end-to-end text-to-speech (TTS) models enabling single-stage training and parallel sampling have been proposed, but their sample quality does not match that of two-stage TTS systems. In this work, we present a parallel end-to-end TTS method that generates more natural sounding audio than current two-stage models. Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling. We also propose a stochastic duration predictor to synthesize speech with diverse rhythms from input text. With the uncertainty modeling over latent variables and the stochastic duration predictor, our method expresses the natural one-to-many relationship in which a text input can be spoken in multiple ways with different pitches and rhythms. A subjective human evaluation (mean opinion score, or MOS) on the LJ Speech, a single speaker dataset, shows that our method outperforms the best publicly available TTS systems and achieves a MOS comparable to ground truth.
+This model can also be used with TTS checkpoints from Massively Multilingual Speech (MMS) 
+as these checkpoints use the same architecture and a slightly modified tokenizer.
+This model was contributed by Matthijs and sanchit-gandhi. The original code can be found here.
+Usage examples
+Both the VITS and MMS-TTS checkpoints can be used with the same API. Since the flow-based model is non-deterministic, it 
+is good practice to set a seed to ensure reproducibility of the outputs. For languages with a Roman alphabet, 
+such as English or French, the tokenizer can be used directly to pre-process the text inputs. The following code example 
+runs a forward pass using the MMS-TTS English checkpoint:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+inputs = tokenizer(text="Hello - my dog is cute", return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(**inputs)
+waveform = outputs.waveform[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5dee9d4d0000106d769d79666cd08379ef79f9d2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_2.txt
@@ -0,0 +1,46 @@
+The resulting waveform can be saved as a .wav file:
+thon
+import scipy
+scipy.io.wavfile.write("techno.wav", rate=model.config.sampling_rate, data=waveform)
+
+Or displayed in a Jupyter Notebook / Google Colab:
+thon
+from IPython.display import Audio
+Audio(waveform, rate=model.config.sampling_rate)
+
+For certain languages with a non-Roman alphabet, such as Arabic, Mandarin or Hindi, the uroman 
+perl package is required to pre-process the text inputs to the Roman alphabet.
+You can check whether you require the uroman package for your language by inspecting the is_uroman attribute of 
+the pre-trained tokenizer:
+thon
+from transformers import VitsTokenizer
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
+print(tokenizer.is_uroman)
+
+If required, you should apply the uroman package to your text inputs prior to passing them to the VitsTokenizer, 
+since currently the tokenizer does not support performing the pre-processing itself.  
+To do this, first clone the uroman repository to your local machine and set the bash variable UROMAN to the local path:
+
+git clone https://github.com/isi-nlp/uroman.git
+cd uroman
+export UROMAN=$(pwd)
+You can then pre-process the text input using the following code snippet. You can either rely on using the bash variable 
+UROMAN to point to the uroman repository, or you can pass the uroman directory as an argument to the uromaize function:
+thon
+import torch
+from transformers import VitsTokenizer, VitsModel, set_seed
+import os
+import subprocess
+tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-kor")
+model = VitsModel.from_pretrained("facebook/mms-tts-kor")
+def uromanize(input_string, uroman_path):
+    """Convert non-Roman strings to Roman using the uroman perl package."""
+    script_path = os.path.join(uroman_path, "bin", "uroman.pl")
+command = ["perl", script_path]
+
+process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+# Execute the perl command
+stdout, stderr = process.communicate(input=input_string.encode())
+
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a819a8c138159644b8692fb86c14943479f66f75
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vits.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+if process.returncode != 0:
+    raise ValueError(f"Error {process.returncode}: {stderr.decode()}")
+
+# Return the output as a string and skip the new-line character at the end
+return stdout.decode()[:-1]
+
+text = "이봐 무슨 일이야"
+uromaized_text = uromanize(text, uroman_path=os.environ["UROMAN"])
+inputs = tokenizer(text=uromaized_text, return_tensors="pt")
+set_seed(555)  # make deterministic
+with torch.no_grad():
+   outputs = model(inputs["input_ids"])
+waveform = outputs.waveform[0]
+
+VitsConfig
+[[autodoc]] VitsConfig
+VitsTokenizer
+[[autodoc]] VitsTokenizer
+    - call
+    - save_vocabulary
+VitsModel
+[[autodoc]] VitsModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vivit.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c807f289c3d89862381b1f904dc5ebee774b1efe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_vivit.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+Video Vision Transformer (ViViT)
+Overview
+The Vivit model was proposed in ViViT: A Video Vision Transformer by Anurag Arnab, Mostafa Dehghani, Georg Heigold, Chen Sun, Mario Lučić, Cordelia Schmid.
+The paper proposes one of the first successful pure-transformer based set of models for video understanding.
+The abstract from the paper is the following:
+We present pure-transformer based models for video classification, drawing upon the recent success of such models in image classification. Our model extracts spatio-temporal tokens from the input video, which are then encoded by a series of transformer layers. In order to handle the long sequences of tokens encountered in video, we propose several, efficient variants of our model which factorise the spatial- and temporal-dimensions of the input. Although transformer-based models are known to only be effective when large training datasets are available, we show how we can effectively regularise the model during training and leverage pretrained image models to be able to train on comparatively small datasets. We conduct thorough ablation studies, and achieve state-of-the-art results on multiple video classification benchmarks including Kinetics 400 and 600, Epic Kitchens, Something-Something v2 and Moments in Time, outperforming prior methods based on deep 3D convolutional networks.
+This model was contributed by jegormeister. The original code (written in JAX) can be found here.
+VivitConfig
+[[autodoc]] VivitConfig
+VivitImageProcessor
+[[autodoc]] VivitImageProcessor
+    - preprocess
+VivitModel
+[[autodoc]] VivitModel
+    - forward
+VivitForVideoClassification
+[[autodoc]] transformers.VivitForVideoClassification
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f8f80c3ebb45614020cd87443547ada24cba1bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_0.txt
@@ -0,0 +1,6 @@
+Wav2Vec2-BERT
+Overview
+The Wav2Vec2-BERT model was proposed in Seamless: Multilingual Expressive and Streaming Speech Translation by the Seamless Communication team from Meta AI.
+This model was pre-trained on 4.5M hours of unlabeled audio data covering more than 143 languages. It requires finetuning to be used for downstream tasks such as Automatic Speech Recognition (ASR), or Audio Classification.
+The official results of the model can be found in Section 3.2.1 of the paper.
+The abstract from the paper is the following:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..efb9ba6d1d0d7a1939e67d68cb3fc8bc531509fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+
+Recent advancements in automatic speech translation have dramatically expanded language coverage, improved multimodal capabilities, and enabled a wide range of tasks and functionalities. That said, large-scale automatic speech translation systems today lack key features that help machine-mediated communication feel seamless when compared to human-to-human dialogue. In this work, we introduce a family of models that enable end-to-end expressive and multilingual translations in a streaming fashion. First, we contribute an improved version of the massively multilingual and multimodal SeamlessM4T model—SeamlessM4T v2. This newer model, incorporating an updated UnitY2 framework, was trained on more low-resource language data. The expanded version of SeamlessAlign adds 114,800 hours of automatically aligned data for a total of 76 languages. SeamlessM4T v2 provides the foundation on which our two newest models, SeamlessExpressive and SeamlessStreaming, are initiated. SeamlessExpressive enables translation that preserves vocal styles and prosody. Compared to previous efforts in expressive speech research, our work addresses certain underexplored aspects of prosody, such as speech rate and pauses, while also preserving the style of one’s voice. As for SeamlessStreaming, our model leverages the Efficient Monotonic Multihead Attention (EMMA) mechanism to generate low-latency target translations without waiting for complete source utterances. As the first of its kind, SeamlessStreaming enables simultaneous speech-to-speech/text translation for multiple source and target languages. To understand the performance of these models, we combined novel and modified versions of existing automatic metrics to evaluate prosody, latency, and robustness. For human evaluations, we adapted existing protocols tailored for measuring the most relevant attributes in the preservation of meaning, naturalness, and expressivity. To ensure that our models can be used safely and responsibly, we implemented the first known red-teaming effort for multimodal machine translation, a system for the detection and mitigation of added toxicity, a systematic evaluation of gender bias, and an inaudible localized watermarking mechanism designed to dampen the impact of deepfakes. Consequently, we bring major components from SeamlessExpressive and SeamlessStreaming together to form Seamless, the first publicly available system that unlocks expressive cross-lingual communication in real-time. In sum, Seamless gives us a pivotal look at the technical foundation needed to turn the Universal Speech Translator from a science fiction concept into a real-world technology. Finally, contributions in this work—including models, code, and a watermark detector—are publicly released and accessible at the link below.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25b4d3d7c44949be7bcf9f171ffa22cc66fbb0a1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+This model was contributed by ylacombe. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af072129cc949a868cdb04aad50c4fe9f1b80214
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-bert.txt_chunk_3.txt
@@ -0,0 +1,38 @@
+Wav2Vec2-BERT follows the same architecture as Wav2Vec2-Conformer, but employs a causal depthwise convolutional layer and uses as input a mel-spectrogram representation of the audio instead of the raw waveform.
+Wav2Vec2-BERT can use either no relative position embeddings, Shaw-like position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+Wav2Vec2-BERT also introduces a Conformer-based adapter network instead of a simple convolutional network.
+
+Resources
+
+[Wav2Vec2BertForCTC] is supported by this example script.
+You can also adapt these notebooks on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+
+[Wav2Vec2BertForSequenceClassification] can be used by adapting this example script.
+See also: Audio classification task guide
+
+Wav2Vec2BertConfig
+[[autodoc]] Wav2Vec2BertConfig
+Wav2Vec2BertProcessor
+[[autodoc]] Wav2Vec2BertProcessor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2BertModel
+[[autodoc]] Wav2Vec2BertModel
+    - forward
+Wav2Vec2BertForCTC
+[[autodoc]] Wav2Vec2BertForCTC
+    - forward
+Wav2Vec2BertForSequenceClassification
+[[autodoc]] Wav2Vec2BertForSequenceClassification
+    - forward
+Wav2Vec2BertForAudioFrameClassification
+[[autodoc]] Wav2Vec2BertForAudioFrameClassification
+    - forward
+Wav2Vec2BertForXVector
+[[autodoc]] Wav2Vec2BertForXVector
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d21b9d4f30e27c6e8c2ed177eb78334b31c2786f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Wav2Vec2-Conformer
+Overview
+The Wav2Vec2-Conformer was added to an updated version of fairseq S2T: Fast Speech-to-Text Modeling with fairseq by Changhan Wang, Yun Tang, Xutai Ma, Anne Wu, Sravya Popuri, Dmytro Okhonko, Juan Pino.
+The official results of the model can be found in Table 3 and Table 4 of the paper.
+The Wav2Vec2-Conformer weights were released by the Meta AI team within the Fairseq library.
+This model was contributed by patrickvonplaten.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2-Conformer follows the same architecture as Wav2Vec2, but replaces the Attention-block with a Conformer-block
+  as introduced in Conformer: Convolution-augmented Transformer for Speech Recognition.
+For the same number of layers, Wav2Vec2-Conformer requires more parameters than Wav2Vec2, but also yields 
+an improved word error rate.
+Wav2Vec2-Conformer uses the same tokenizer and feature extractor as Wav2Vec2.
+Wav2Vec2-Conformer can use either no relative position embeddings, Transformer-XL-like position embeddings, or
+  rotary position embeddings by setting the correct config.position_embeddings_type.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e479ae78b836ee15ecf3851a0a1fb84248e7a777
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2-conformer.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+Wav2Vec2ConformerConfig
+[[autodoc]] Wav2Vec2ConformerConfig
+Wav2Vec2Conformer specific outputs
+[[autodoc]] models.wav2vec2_conformer.modeling_wav2vec2_conformer.Wav2Vec2ConformerForPreTrainingOutput
+Wav2Vec2ConformerModel
+[[autodoc]] Wav2Vec2ConformerModel
+    - forward
+Wav2Vec2ConformerForCTC
+[[autodoc]] Wav2Vec2ConformerForCTC
+    - forward
+Wav2Vec2ConformerForSequenceClassification
+[[autodoc]] Wav2Vec2ConformerForSequenceClassification
+    - forward
+Wav2Vec2ConformerForAudioFrameClassification
+[[autodoc]] Wav2Vec2ConformerForAudioFrameClassification
+    - forward
+Wav2Vec2ConformerForXVector
+[[autodoc]] Wav2Vec2ConformerForXVector
+    - forward
+Wav2Vec2ConformerForPreTraining
+[[autodoc]] Wav2Vec2ConformerForPreTraining
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e20ef969e48dae43ab4ab23c168385262f159953
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Wav2Vec2
+Overview
+The Wav2Vec2 model was proposed in wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
+The abstract from the paper is the following:
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on
+transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks
+the speech input in the latent space and solves a contrastive task defined over a quantization of the latent
+representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the
+clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state
+of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and
+pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech
+recognition with limited amounts of labeled data.
+This model was contributed by patrickvonplaten.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+
+Using Flash Attention 2
+Flash Attention 2 is an faster, optimized version of the model.
+Installation
+First, check whether your hardware is compatible with Flash Attention 2. The latest list of compatible hardware can be found in the official documentation. If your hardware is not compatible with Flash Attention 2, you can still benefit from attention kernel optimisations through Better Transformer support covered above.
+Next, install the latest version of Flash Attention 2:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4a7e098e702310fc3c6d0f60d15b4747748c36a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+pip install -U flash-attn --no-build-isolation
+Usage
+To load a model using Flash Attention 2, we can pass the argument attn_implementation="flash_attention_2" to .from_pretrained. We'll also load the model in half-precision (e.g. torch.float16), since it results in almost no degradation to audio quality but significantly lower memory usage and faster inference:
+thon
+
+from transformers import Wav2Vec2Model
+
+model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-960h-lv60-self", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
+
+Expected speedups
+Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of the facebook/wav2vec2-large-960h-lv60-self model and the flash-attention-2 and sdpa (scale-dot-product-attention) versions. . We show the average speedup obtained on the librispeech_asr clean validation split: 
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Wav2Vec2. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A notebook on how to leverage a pretrained Wav2Vec2 model for emotion classification. 🌎
+[Wav2Vec2ForCTC] is supported by this example script and notebook.
+Audio classification task guide
+
+A blog post on boosting Wav2Vec2 with n-grams in 🤗 Transformers.
+A blog post on how to finetune Wav2Vec2 for English ASR with 🤗 Transformers.
+A blog post on finetuning XLS-R for Multi-Lingual ASR with 🤗 Transformers.
+A notebook on how to create YouTube captions from any video by transcribing audio with Wav2Vec2. 🌎
+[Wav2Vec2ForCTC] is supported by a notebook on how to finetune a speech recognition model in English, and how to finetune a speech recognition model in any language.
+Automatic speech recognition task guide
+
+🚀 Deploy
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8ef6276e92f7b2e921ff8baf88ec0be21b898ec2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_2.txt
@@ -0,0 +1,36 @@
+🚀 Deploy
+
+A blog post on how to deploy Wav2Vec2 for Automatic Speech Recognition with Hugging Face's Transformers & Amazon SageMaker.
+
+Wav2Vec2Config
+[[autodoc]] Wav2Vec2Config
+Wav2Vec2CTCTokenizer
+[[autodoc]] Wav2Vec2CTCTokenizer
+    - call
+    - save_vocabulary
+    - decode
+    - batch_decode
+    - set_target_lang
+Wav2Vec2FeatureExtractor
+[[autodoc]] Wav2Vec2FeatureExtractor
+    - call
+Wav2Vec2Processor
+[[autodoc]] Wav2Vec2Processor
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Wav2Vec2ProcessorWithLM
+[[autodoc]] Wav2Vec2ProcessorWithLM
+    - call
+    - pad
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+Decoding multiple audios
+If you are planning to decode multiple batches of audios, you should consider using [~Wav2Vec2ProcessorWithLM.batch_decode] and passing an instantiated multiprocessing.Pool.
+Otherwise, [~Wav2Vec2ProcessorWithLM.batch_decode] performance will be slower than calling [~Wav2Vec2ProcessorWithLM.decode] for each audio individually, as it internally instantiates a new Pool for every call. See the example below:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d86782d435f38107d575d3886c231f605e117ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+Let's see how to use a user-managed pool for batch decoding multiple audios
+from multiprocessing import get_context
+from transformers import AutoTokenizer, AutoProcessor, AutoModelForCTC
+from datasets import load_dataset
+import datasets
+import torch
+import model, feature extractor, tokenizer
+model = AutoModelForCTC.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm").to("cuda")
+processor = AutoProcessor.from_pretrained("patrickvonplaten/wav2vec2-base-100h-with-lm")
+load example dataset
+dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+dataset = dataset.cast_column("audio", datasets.Audio(sampling_rate=16_000))
+def map_to_array(batch):
+     batch["speech"] = batch["audio"]["array"]
+     return batch
+prepare speech data for batch inference
+dataset = dataset.map(map_to_array, remove_columns=["audio"])
+def map_to_pred(batch, pool):
+     inputs = processor(batch["speech"], sampling_rate=16_000, padding=True, return_tensors="pt")
+     inputs = {k: v.to("cuda") for k, v in inputs.items()}
+
+     with torch.no_grad():
+         logits = model(**inputs).logits
+     transcription = processor.batch_decode(logits.cpu().numpy(), pool).text
+     batch["transcription"] = transcription
+     return batch
+
+note: pool should be instantiated after Wav2Vec2ProcessorWithLM.
+otherwise, the LM won't be available to the pool's sub-processes
+select number of processes and batch_size based on number of CPU cores available and on dataset size
+with get_context("fork").Pool(processes=2) as pool:
+     result = dataset.map(
+         map_to_pred, batched=True, batch_size=2, fn_kwargs={"pool": pool}, remove_columns=["speech"]
+     )
+result["transcription"][:2]
+['MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL', "NOR IS MISTER COULTER'S MANNER LESS INTERESTING THAN HIS MATTER"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3435d939eb7f2d23dd9c7206e714aa8f2dacc9d9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2.txt_chunk_4.txt
@@ -0,0 +1,46 @@
+Wav2Vec2 specific outputs
+[[autodoc]] models.wav2vec2_with_lm.processing_wav2vec2_with_lm.Wav2Vec2DecoderWithLMOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForPreTrainingOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2BaseModelOutput
+[[autodoc]] models.wav2vec2.modeling_flax_wav2vec2.FlaxWav2Vec2ForPreTrainingOutput
+
+Wav2Vec2Model
+[[autodoc]] Wav2Vec2Model
+    - forward
+Wav2Vec2ForCTC
+[[autodoc]] Wav2Vec2ForCTC
+    - forward
+    - load_adapter
+Wav2Vec2ForSequenceClassification
+[[autodoc]] Wav2Vec2ForSequenceClassification
+    - forward
+Wav2Vec2ForAudioFrameClassification
+[[autodoc]] Wav2Vec2ForAudioFrameClassification
+    - forward
+Wav2Vec2ForXVector
+[[autodoc]] Wav2Vec2ForXVector
+    - forward
+Wav2Vec2ForPreTraining
+[[autodoc]] Wav2Vec2ForPreTraining
+    - forward
+
+TFWav2Vec2Model
+[[autodoc]] TFWav2Vec2Model
+    - call
+TFWav2Vec2ForSequenceClassification
+[[autodoc]] TFWav2Vec2ForSequenceClassification
+    - call
+TFWav2Vec2ForCTC
+[[autodoc]] TFWav2Vec2ForCTC
+    - call
+
+FlaxWav2Vec2Model
+[[autodoc]] FlaxWav2Vec2Model
+    - call
+FlaxWav2Vec2ForCTC
+[[autodoc]] FlaxWav2Vec2ForCTC
+    - call
+FlaxWav2Vec2ForPreTraining
+[[autodoc]] FlaxWav2Vec2ForPreTraining
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1daf328a9218e9b93823d90ea80351fca37bb0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+Wav2Vec2Phoneme
+Overview
+The Wav2Vec2Phoneme model was proposed in Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al.,
+2021 by Qiantong Xu, Alexei Baevski, Michael Auli.
+The abstract from the paper is the following:
+Recent progress in self-training, self-supervised pretraining and unsupervised learning enabled well performing speech
+recognition systems without any labeled data. However, in many cases there is labeled data available for related
+languages which is not utilized by these methods. This paper extends previous work on zero-shot cross-lingual transfer
+learning by fine-tuning a multilingually pretrained wav2vec 2.0 model to transcribe unseen languages. This is done by
+mapping phonemes of the training languages to the target language using articulatory features. Experiments show that
+this simple method significantly outperforms prior work which introduced task-specific architectures and used only part
+of a monolingually pretrained model.
+Relevant checkpoints can be found under https://huggingface.co/models?other=phoneme-recognition.
+This model was contributed by patrickvonplaten
+The original code can be found here.
+Usage tips
+
+Wav2Vec2Phoneme uses the exact same architecture as Wav2Vec2
+Wav2Vec2Phoneme is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+Wav2Vec2Phoneme model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2PhonemeCTCTokenizer].
+Wav2Vec2Phoneme can be fine-tuned on multiple language at once and decode unseen languages in a single forward pass
+  to a sequence of phonemes
+By default, the model outputs a sequence of phonemes. In order to transform the phonemes to a sequence of words one
+  should make use of a dictionary and language model.
+
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7dbe5061d1eec77b3b46c0778af9d253012fb8c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wav2vec2_phoneme.txt_chunk_1.txt
@@ -0,0 +1,9 @@
+Wav2Vec2Phoneme's architecture is based on the Wav2Vec2 model, for API reference, check out Wav2Vec2's documentation page 
+except for the tokenizer.
+
+Wav2Vec2PhonemeCTCTokenizer
+[[autodoc]] Wav2Vec2PhonemeCTCTokenizer
+    - call
+    - batch_decode
+    - decode
+    - phonemize
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4f48b9efe8c54e290c17ee2a32b310c7a50fc218
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+WavLM
+Overview
+The WavLM model was proposed in WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing by Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen,
+Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu,
+Michael Zeng, Furu Wei.
+The abstract from the paper is the following:
+Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been
+attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker
+identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is
+challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks.
+WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity
+preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on
+recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where
+additional overlapped utterances are created unsupervisedly and incorporated during model training. Lastly, we scale up
+the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB
+benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.
+Relevant checkpoints can be found under https://huggingface.co/models?other=wavlm.
+This model was contributed by patrickvonplaten. The Authors' code can be
+found here.
+Usage tips
+
+WavLM is a speech model that accepts a float array corresponding to the raw waveform of the speech signal. Please use
+  [Wav2Vec2Processor] for the feature extraction.
+WavLM model can be fine-tuned using connectionist temporal classification (CTC) so the model output has to be decoded
+  using [Wav2Vec2CTCTokenizer].
+WavLM performs especially well on speaker verification, speaker identification, and speaker diarization tasks.
+
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13d1ce0853275b678e31db74cca45564e6adcf06
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_wavlm.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+Resources
+
+Audio classification task guide
+Automatic speech recognition task guide
+
+WavLMConfig
+[[autodoc]] WavLMConfig
+WavLMModel
+[[autodoc]] WavLMModel
+    - forward
+WavLMForCTC
+[[autodoc]] WavLMForCTC
+    - forward
+WavLMForSequenceClassification
+[[autodoc]] WavLMForSequenceClassification
+    - forward
+WavLMForAudioFrameClassification
+[[autodoc]] WavLMForAudioFrameClassification
+    - forward
+WavLMForXVector
+[[autodoc]] WavLMForXVector
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20dc62b5335eda8b9bb6b5566de91e5259519138
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+Whisper
+Overview
+The Whisper model was proposed in Robust Speech Recognition via Large-Scale Weak Supervision by Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, Ilya Sutskever.
+The abstract from the paper is the following:
+We study the capabilities of speech processing systems trained simply to predict large amounts of transcripts of audio on the internet. When scaled to 680,000 hours of multilingual and multitask supervision, the resulting models generalize well to standard benchmarks and are often competitive with prior fully supervised results but in a zeroshot transfer setting without the need for any finetuning. When compared to humans, the models approach their accuracy and robustness. We are releasing models and inference code to serve as a foundation for further work on robust speech processing.
+This model was contributed by Arthur Zucker. The Tensorflow version of this model was contributed by amyeroberts.
+The original code can be found here.
+Usage tips
+
+The model usually performs well without requiring any finetuning.
+The architecture follows a classic encoder-decoder architecture, which means that it relies on the [~generation.GenerationMixin.generate] function for inference.
+
+One can use [WhisperProcessor] to prepare audio for the model, and decode the predicted ID's back into text.
+
+To convert the model and the processor, we recommend using the following:
+
+python src/transformers/models/whisper/convert_openai_to_hf.py --checkpoint_path "" --pytorch_dump_folder_path "Arthur/whisper-3" --convert_preprocessor True
+The script will automatically determine all necessary parameters from the OpenAI checkpoint. A tiktoken library needs to be installed
+to perform the conversion of the OpenAI tokenizer to the tokenizers version.
+Inference
+Here is a step-by-step guide to transcribing an audio sample using a pre-trained Whisper model:
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76f80cc08a3fc636cb703cdb83a85387b371a852
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+from datasets import load_dataset
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+Select an audio file and read it:
+ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
+audio_sample = ds[0]["audio"]
+waveform = audio_sample["array"]
+sampling_rate = audio_sample["sampling_rate"]
+Load the Whisper model in Hugging Face format:
+processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+Use the model and processor to transcribe the audio:
+input_features = processor(
+     waveform, sampling_rate=sampling_rate, return_tensors="pt"
+ ).input_features
+Generate token ids
+predicted_ids = model.generate(input_features)
+Decode token ids to text
+transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+transcription[0]
+' Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.'
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with Whisper. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Fine-tune Whisper on your own dataset for better downstream performance.
+Distil-Whisper: Upto 6x faster, 2x smaller distilled Whisper models for English. We release the model checkpoints, and distillation code.
+A fork with a script to convert a Whisper model in Hugging Face format to OpenAI format. 🌎
+Usage example:
+
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..156b2bbcadd9f15eaf4c2e26d9a8a542b6449604
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_whisper.txt_chunk_2.txt
@@ -0,0 +1,71 @@
+pip install -U openai-whisper
+python convert_hf_to_openai.py \
+    --checkpoint openai/whisper-tiny \
+    --whisper_dump_path whisper-tiny-openai.pt
+
+WhisperConfig
+[[autodoc]] WhisperConfig
+WhisperTokenizer
+[[autodoc]] WhisperTokenizer
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperTokenizerFast
+[[autodoc]] WhisperTokenizerFast
+    - set_prefix_tokens
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+    - batch_decode
+    - decode
+    - basic_normalize
+    - normalize
+WhisperFeatureExtractor
+[[autodoc]] WhisperFeatureExtractor
+    - call
+WhisperProcessor
+[[autodoc]] WhisperProcessor
+    - call
+    - from_pretrained
+    - save_pretrained
+    - batch_decode
+    - decode
+
+WhisperModel
+[[autodoc]] WhisperModel
+    - forward
+    - _mask_input_features
+WhisperForConditionalGeneration
+[[autodoc]] WhisperForConditionalGeneration
+    - forward
+    - generate
+WhisperForCausalLM
+[[autodoc]] WhisperForCausalLM
+    - forward
+WhisperForAudioClassification
+[[autodoc]] WhisperForAudioClassification
+    - forward
+
+TFWhisperModel
+[[autodoc]] TFWhisperModel
+    - call
+TFWhisperForConditionalGeneration
+[[autodoc]] TFWhisperForConditionalGeneration
+    - call
+
+FlaxWhisperModel
+[[autodoc]] FlaxWhisperModel
+    - call
+FlaxWhisperForConditionalGeneration
+[[autodoc]] FlaxWhisperForConditionalGeneration
+    - call
+FlaxWhisperForAudioClassification
+[[autodoc]] FlaxWhisperForAudioClassification
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..147508c630ee25f1294d9cae704630717b4fcd70
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+X-CLIP
+Overview
+The X-CLIP model was proposed in Expanding Language-Image Pretrained Models for General Video Recognition by Bolin Ni, Houwen Peng, Minghao Chen, Songyang Zhang, Gaofeng Meng, Jianlong Fu, Shiming Xiang, Haibin Ling.
+X-CLIP is a minimal extension of CLIP for video. The model consists of a text encoder, a cross-frame vision encoder, a multi-frame integration Transformer, and a video-specific prompt generator.
+The abstract from the paper is the following:
+Contrastive language-image pretraining has shown great success in learning visual-textual joint representation from web-scale data, demonstrating remarkable "zero-shot" generalization ability for various image tasks. However, how to effectively expand such new language-image pretraining methods to video domains is still an open problem. In this work, we present a simple yet effective approach that adapts the pretrained language-image models to video recognition directly, instead of pretraining a new model from scratch. More concretely, to capture the long-range dependencies of frames along the temporal dimension, we propose a cross-frame attention mechanism that explicitly exchanges information across frames. Such module is lightweight and can be plugged into pretrained language-image models seamlessly. Moreover, we propose a video-specific prompting scheme, which leverages video content information for generating discriminative textual prompts. Extensive experiments demonstrate that our approach is effective and can be generalized to different video recognition scenarios. In particular, under fully-supervised settings, our approach achieves a top-1 accuracy of 87.1% on Kinectics-400, while using 12 times fewer FLOPs compared with Swin-L and ViViT-H. In zero-shot experiments, our approach surpasses the current state-of-the-art methods by +7.6% and +14.9% in terms of top-1 accuracy under two popular protocols. In few-shot scenarios, our approach outperforms previous best methods by +32.1% and +23.1% when the labeled data is extremely limited.
+Tips:
+
+Usage of X-CLIP is identical to CLIP.
+
+ X-CLIP architecture. Taken from the original paper. 
+This model was contributed by nielsr.
+The original code can be found here.
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with X-CLIP.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d148a66134f8a2ea923c84c077fa99b95fc1dd6b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xclip.txt_chunk_1.txt
@@ -0,0 +1,23 @@
+Demo notebooks for X-CLIP can be found here.
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+XCLIPProcessor
+[[autodoc]] XCLIPProcessor
+XCLIPConfig
+[[autodoc]] XCLIPConfig
+    - from_text_vision_configs
+XCLIPTextConfig
+[[autodoc]] XCLIPTextConfig
+XCLIPVisionConfig
+[[autodoc]] XCLIPVisionConfig
+XCLIPModel
+[[autodoc]] XCLIPModel
+    - forward
+    - get_text_features
+    - get_video_features
+XCLIPTextModel
+[[autodoc]] XCLIPTextModel
+    - forward
+XCLIPVisionModel
+[[autodoc]] XCLIPVisionModel
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b34d2ec84ae420030ac73a1430f6e70b884abaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+XGLM
+Overview
+The XGLM model was proposed in Few-shot Learning with Multilingual Language Models
+by Xi Victoria Lin, Todor Mihaylov, Mikel Artetxe, Tianlu Wang, Shuohui Chen, Daniel Simig, Myle Ott, Naman Goyal, 
+Shruti Bhosale, Jingfei Du, Ramakanth Pasunuru, Sam Shleifer, Punit Singh Koura, Vishrav Chaudhary, Brian O'Horo, 
+Jeff Wang, Luke Zettlemoyer, Zornitsa Kozareva, Mona Diab, Veselin Stoyanov, Xian Li.
+The abstract from the paper is the following:
+Large-scale autoregressive language models such as GPT-3 are few-shot learners that can perform a wide range of language 
+tasks without fine-tuning. While these models are known to be able to jointly represent many different languages, 
+their training data is dominated by English, potentially limiting their cross-lingual generalization. 
+In this work, we train multilingual autoregressive language models on a balanced corpus covering a diverse set of languages, 
+and study their few- and zero-shot learning capabilities in a wide range of tasks. Our largest model with 7.5 billion parameters 
+sets new state of the art in few-shot learning in more than 20 representative languages, outperforming GPT-3 of comparable size 
+in multilingual commonsense reasoning (with +7.4% absolute accuracy improvement in 0-shot settings and +9.4% in 4-shot settings) 
+and natural language inference (+5.4% in each of 0-shot and 4-shot settings). On the FLORES-101 machine translation benchmark, 
+our model outperforms GPT-3 on 171 out of 182 translation directions with 32 training examples, while surpassing the 
+official supervised baseline in 45 directions. We present a detailed analysis of where the model succeeds and fails, 
+showing in particular that it enables cross-lingual in-context learning on some tasks, while there is still room for improvement 
+on surface form robustness and adaptation to tasks that do not have a natural cloze form. Finally, we evaluate our models 
+in social value tasks such as hate speech detection in five languages and find it has limitations similar to comparable sized GPT-3 models.
+This model was contributed by Suraj. The original code can be found here.
+Resources
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6ef43b4a5c39574e73c90c7126785fdfb9db16a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xglm.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Causal language modeling task guide
+
+XGLMConfig
+[[autodoc]] XGLMConfig
+XGLMTokenizer
+[[autodoc]] XGLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XGLMTokenizerFast
+[[autodoc]] XGLMTokenizerFast
+
+XGLMModel
+[[autodoc]] XGLMModel
+    - forward
+XGLMForCausalLM
+[[autodoc]] XGLMForCausalLM
+    - forward
+
+TFXGLMModel
+[[autodoc]] TFXGLMModel
+    - call
+TFXGLMForCausalLM
+[[autodoc]] TFXGLMForCausalLM
+    - call
+
+FlaxXGLMModel
+[[autodoc]] FlaxXGLMModel
+    - call
+FlaxXGLMForCausalLM
+[[autodoc]] FlaxXGLMForCausalLM
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d8f7bf81f7634598c07e63de1cc87ac1c317620
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_0.txt
@@ -0,0 +1,5 @@
+XLM-ProphetNet
+
+This model is in maintenance mode only, we don't accept any new PRs changing its code.
+If you run into any issues running this model, please reinstall the last version that supported this model: v4.40.2.
+You can do so by running the following command: pip install -U transformers==4.40.2.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..954253bdc3824d958a8262f5627e034156eca6ff
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+DISCLAIMER: If you see something strange, file a Github Issue and assign
+@patrickvonplaten
+Overview
+The XLM-ProphetNet model was proposed in ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training, by Yu Yan, Weizhen Qi, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei
+Zhang, Ming Zhou on 13 Jan, 2020.
+XLM-ProphetNet is an encoder-decoder model and can predict n-future tokens for "ngram" language modeling instead of
+just the next token. Its architecture is identical to ProhpetNet, but the model was trained on the multi-lingual
+"wiki100" Wikipedia dump. XLM-ProphetNet's model architecture and pretraining objective is same as ProphetNet, but XLM-ProphetNet was pre-trained on the cross-lingual dataset XGLUE.
+The abstract from the paper is the following:
+In this paper, we present a new sequence-to-sequence pretraining model called ProphetNet, which introduces a novel
+self-supervised objective named future n-gram prediction and the proposed n-stream self-attention mechanism. Instead of
+the optimization of one-step ahead prediction in traditional sequence-to-sequence model, the ProphetNet is optimized by
+n-step ahead prediction which predicts the next n tokens simultaneously based on previous context tokens at each time
+step. The future n-gram prediction explicitly encourages the model to plan for the future tokens and prevent
+overfitting on strong local correlations. We pre-train ProphetNet using a base scale dataset (16GB) and a large scale
+dataset (160GB) respectively. Then we conduct experiments on CNN/DailyMail, Gigaword, and SQuAD 1.1 benchmarks for
+abstractive summarization and question generation tasks. Experimental results show that ProphetNet achieves new
+state-of-the-art results on all these datasets compared to the models using the same scale pretraining corpus.
+The Authors' code can be found here.
+Resources
+
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..59626d12e91d59d5d95faa08f4df5f982f0a5f83
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-prophetnet.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+Causal language modeling task guide
+Translation task guide
+Summarization task guide
+
+XLMProphetNetConfig
+[[autodoc]] XLMProphetNetConfig
+XLMProphetNetTokenizer
+[[autodoc]] XLMProphetNetTokenizer
+XLMProphetNetModel
+[[autodoc]] XLMProphetNetModel
+XLMProphetNetEncoder
+[[autodoc]] XLMProphetNetEncoder
+XLMProphetNetDecoder
+[[autodoc]] XLMProphetNetDecoder
+XLMProphetNetForConditionalGeneration
+[[autodoc]] XLMProphetNetForConditionalGeneration
+XLMProphetNetForCausalLM
+[[autodoc]] XLMProphetNetForCausalLM
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..746da1dd878d0e5c31fd49e86a498e5987e6e793
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_0.txt
@@ -0,0 +1,18 @@
+XLM-RoBERTa-XL
+Overview
+The XLM-RoBERTa-XL model was proposed in Larger-Scale Transformers for Multilingual Masked Language Modeling by Naman Goyal, Jingfei Du, Myle Ott, Giri Anantharaman, Alexis Conneau. 
+The abstract from the paper is the following:
+Recent work has demonstrated the effectiveness of cross-lingual language model pretraining for cross-lingual understanding. In this study, we present the results of two larger multilingual masked language models, with 3.5B and 10.7B parameters. Our two new models dubbed XLM-R XL and XLM-R XXL outperform XLM-R by 1.8% and 2.4% average accuracy on XNLI. Our model also outperforms the RoBERTa-Large model on several English tasks of the GLUE benchmark by 0.3% on average while handling 99 more languages. This suggests pretrained models with larger capacity may obtain both strong performance on high-resource languages while greatly improving low-resource languages. We make our code and models publicly available.
+This model was contributed by Soonhwan-Kwon and stefan-it. The original code can be found here.
+Usage tips
+XLM-RoBERTa-XL is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does 
+not require lang tensors to understand which language is used, and should be able to determine the correct 
+language from the input ids.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e1b691f3aef7792fb1a4f93859198e5ca9c5e2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta-xl.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMRobertaXLConfig
+[[autodoc]] XLMRobertaXLConfig
+XLMRobertaXLModel
+[[autodoc]] XLMRobertaXLModel
+    - forward
+XLMRobertaXLForCausalLM
+[[autodoc]] XLMRobertaXLForCausalLM
+    - forward
+XLMRobertaXLForMaskedLM
+[[autodoc]] XLMRobertaXLForMaskedLM
+    - forward
+XLMRobertaXLForSequenceClassification
+[[autodoc]] XLMRobertaXLForSequenceClassification
+    - forward
+XLMRobertaXLForMultipleChoice
+[[autodoc]] XLMRobertaXLForMultipleChoice
+    - forward
+XLMRobertaXLForTokenClassification
+[[autodoc]] XLMRobertaXLForTokenClassification
+    - forward
+XLMRobertaXLForQuestionAnswering
+[[autodoc]] XLMRobertaXLForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8096989859b66409447bdf5f0630f531607184b5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+XLM-RoBERTa
+
+Overview
+The XLM-RoBERTa model was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume
+Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook's
+RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl
+data.
+The abstract from the paper is the following:
+This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a
+wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred
+languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly
+outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +13.8% average accuracy on
+XNLI, +12.3% average F1 score on MLQA, and +2.1% average F1 score on NER. XLM-R performs particularly well on
+low-resource languages, improving 11.8% in XNLI accuracy for Swahili and 9.2% for Urdu over the previous XLM model. We
+also present a detailed empirical evaluation of the key factors that are required to achieve these gains, including the
+trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource
+languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing
+per-language performance; XLM-Ris very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We
+will make XLM-R code, data, and models publicly available.
+This model was contributed by stefan-it. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8bb9cb971c6f3380d2ef6c4612d55a4afb3c31a5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+XLM-RoBERTa is a multilingual model trained on 100 different languages. Unlike some XLM multilingual models, it does
+  not require lang tensors to understand which language is used, and should be able to determine the correct
+  language from the input ids.
+Uses RoBERTa tricks on the XLM approach, but does not use the translation language modeling objective. It only uses masked language modeling on sentences coming from one language.
+
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with XLM-RoBERTa. If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+A blog post on how to finetune XLM RoBERTa for multiclass classification with Habana Gaudi on AWS
+[XLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[TFXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForSequenceClassification] is supported by this example script and notebook.
+Text classification chapter of the 🤗 Hugging Face Task Guides.
+Text classification task guide
+
+[XLMRobertaForTokenClassification] is supported by this example script and notebook.
+[TFXLMRobertaForTokenClassification] is supported by this example script and notebook.
+[FlaxXLMRobertaForTokenClassification] is supported by this example script.
+Token classification chapter of the 🤗 Hugging Face Course.
+Token classification task guide
+
+[XLMRobertaForCausalLM] is supported by this example script and notebook.
+Causal language modeling chapter of the 🤗 Hugging Face Task Guides.
+Causal language modeling task guide
+
+[XLMRobertaForMaskedLM] is supported by this example script and notebook.
+[TFXLMRobertaForMaskedLM] is supported by this example script and notebook.
+[FlaxXLMRobertaForMaskedLM] is supported by this example script and notebook.
+Masked language modeling chapter of the 🤗 Hugging Face Course.
+Masked language modeling
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..311f9f7cc6839cde23b3d2390666a570517cd147
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_2.txt
@@ -0,0 +1,51 @@
+[XLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[TFXLMRobertaForQuestionAnswering] is supported by this example script and notebook.
+[FlaxXLMRobertaForQuestionAnswering] is supported by this example script.
+Question answering chapter of the 🤗 Hugging Face Course.
+Question answering task guide
+
+Multiple choice
+
+[XLMRobertaForMultipleChoice] is supported by this example script and notebook.
+[TFXLMRobertaForMultipleChoice] is supported by this example script and notebook.
+Multiple choice task guide
+
+🚀 Deploy
+
+A blog post on how to Deploy Serverless XLM RoBERTa on AWS Lambda.
+
+ 
+This implementation is the same as RoBERTa. Refer to the documentation of RoBERTa for usage examples as well as the information relative to the inputs and outputs.
+
+XLMRobertaConfig
+[[autodoc]] XLMRobertaConfig
+XLMRobertaTokenizer
+[[autodoc]] XLMRobertaTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLMRobertaTokenizerFast
+[[autodoc]] XLMRobertaTokenizerFast
+
+XLMRobertaModel
+[[autodoc]] XLMRobertaModel
+    - forward
+XLMRobertaForCausalLM
+[[autodoc]] XLMRobertaForCausalLM
+    - forward
+XLMRobertaForMaskedLM
+[[autodoc]] XLMRobertaForMaskedLM
+    - forward
+XLMRobertaForSequenceClassification
+[[autodoc]] XLMRobertaForSequenceClassification
+    - forward
+XLMRobertaForMultipleChoice
+[[autodoc]] XLMRobertaForMultipleChoice
+    - forward
+XLMRobertaForTokenClassification
+[[autodoc]] XLMRobertaForTokenClassification
+    - forward
+XLMRobertaForQuestionAnswering
+[[autodoc]] XLMRobertaForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de4bfd42c323dbedd5f384cadd7e728f7c5aae4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-roberta.txt_chunk_3.txt
@@ -0,0 +1,43 @@
+TFXLMRobertaModel
+[[autodoc]] TFXLMRobertaModel
+    - call
+TFXLMRobertaForCausalLM
+[[autodoc]] TFXLMRobertaForCausalLM
+    - call
+TFXLMRobertaForMaskedLM
+[[autodoc]] TFXLMRobertaForMaskedLM
+    - call
+TFXLMRobertaForSequenceClassification
+[[autodoc]] TFXLMRobertaForSequenceClassification
+    - call
+TFXLMRobertaForMultipleChoice
+[[autodoc]] TFXLMRobertaForMultipleChoice
+    - call
+TFXLMRobertaForTokenClassification
+[[autodoc]] TFXLMRobertaForTokenClassification
+    - call
+TFXLMRobertaForQuestionAnswering
+[[autodoc]] TFXLMRobertaForQuestionAnswering
+    - call
+
+FlaxXLMRobertaModel
+[[autodoc]] FlaxXLMRobertaModel
+    - call
+FlaxXLMRobertaForCausalLM
+[[autodoc]] FlaxXLMRobertaForCausalLM
+    - call
+FlaxXLMRobertaForMaskedLM
+[[autodoc]] FlaxXLMRobertaForMaskedLM
+    - call
+FlaxXLMRobertaForSequenceClassification
+[[autodoc]] FlaxXLMRobertaForSequenceClassification
+    - call
+FlaxXLMRobertaForMultipleChoice
+[[autodoc]] FlaxXLMRobertaForMultipleChoice
+    - call
+FlaxXLMRobertaForTokenClassification
+[[autodoc]] FlaxXLMRobertaForTokenClassification
+    - call
+FlaxXLMRobertaForQuestionAnswering
+[[autodoc]] FlaxXLMRobertaForQuestionAnswering
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e4fceef7b3939ed6951944bdc6195556babc297
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm-v.txt_chunk_0.txt
@@ -0,0 +1,27 @@
+XLM-V
+Overview
+XLM-V is multilingual language model with a one million token vocabulary trained on 2.5TB of data from Common Crawl (same as XLM-R).
+It was introduced in the XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models
+paper by Davis Liang, Hila Gonen, Yuning Mao, Rui Hou, Naman Goyal, Marjan Ghazvininejad, Luke Zettlemoyer and Madian Khabsa.
+From the abstract of the XLM-V paper:
+Large multilingual language models typically rely on a single vocabulary shared across 100+ languages.
+As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged.
+This vocabulary bottleneck limits the representational capabilities of multilingual models like XLM-R.
+In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by
+de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity
+to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically
+more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V,
+a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we
+tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), and
+named entity recognition (WikiAnn) to low-resource tasks (Americas NLI, MasakhaNER).
+This model was contributed by stefan-it, including detailed experiments with XLM-V on downstream tasks.
+The experiments repository can be found here.
+Usage tips
+
+XLM-V is compatible with the XLM-RoBERTa model architecture, only model weights from fairseq
+  library had to be converted.
+The XLMTokenizer implementation is used to load the vocab and performs tokenization.
+
+A XLM-V (base size) model is available under the facebook/xlm-v-base identifier.
+
+XLM-V architecture is the same as XLM-RoBERTa, refer to XLM-RoBERTa documentation for API reference, and examples.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b88efcf1d4b60a76c28c7332cc8ec5fffe561f17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_0.txt
@@ -0,0 +1,28 @@
+XLM
+
+Overview
+The XLM model was proposed in Cross-lingual Language Model Pretraining by
+Guillaume Lample, Alexis Conneau. It's a transformer pretrained using one of the following objectives:
+
+a causal language modeling (CLM) objective (next token prediction),
+a masked language modeling (MLM) objective (BERT-like), or
+a Translation Language Modeling (TLM) object (extension of BERT's MLM to multiple language inputs)
+
+The abstract from the paper is the following:
+Recent studies have demonstrated the efficiency of generative pretraining for English natural language understanding.
+In this work, we extend this approach to multiple languages and show the effectiveness of cross-lingual pretraining. We
+propose two methods to learn cross-lingual language models (XLMs): one unsupervised that only relies on monolingual
+data, and one supervised that leverages parallel data with a new cross-lingual language model objective. We obtain
+state-of-the-art results on cross-lingual classification, unsupervised and supervised machine translation. On XNLI, our
+approach pushes the state of the art by an absolute gain of 4.9% accuracy. On unsupervised machine translation, we
+obtain 34.3 BLEU on WMT'16 German-English, improving the previous state of the art by more than 9 BLEU. On supervised
+machine translation, we obtain a new state of the art of 38.5 BLEU on WMT'16 Romanian-English, outperforming the
+previous best approach by more than 4 BLEU. Our code and pretrained models will be made publicly available.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
+
+XLM has many different checkpoints, which were trained using different objectives: CLM, MLM or TLM. Make sure to
+  select the correct objective for your task (e.g. MLM checkpoints are not suitable for generation).
+XLM has multilingual checkpoints which leverage a specific lang parameter. Check out the multi-lingual page for more information.
+
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c98faaa5871f3c8d34d0e313b79133f24bc74088
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+A transformer model trained on several languages. There are three different type of training for this model and the library provides checkpoints for all of them:
+
+Causal language modeling (CLM) which is the traditional autoregressive training (so this model could be in the previous section as well). One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages.
+Masked language modeling (MLM) which is like RoBERTa. One of the languages is selected for each training sample, and the model input is a sentence of 256 tokens, that may span over several documents in one of those languages, with dynamic masking of the tokens.
+A combination of MLM and translation language modeling (TLM). This consists of concatenating a sentence in two different languages, with random masking. To predict one of the masked tokens, the model can use both, the surrounding context in language 1 and the context given by language 2.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XLMConfig
+[[autodoc]] XLMConfig
+XLMTokenizer
+[[autodoc]] XLMTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLM specific outputs
+[[autodoc]] models.xlm.modeling_xlm.XLMForQuestionAnsweringOutput
+
+XLMModel
+[[autodoc]] XLMModel
+    - forward
+XLMWithLMHeadModel
+[[autodoc]] XLMWithLMHeadModel
+    - forward
+XLMForSequenceClassification
+[[autodoc]] XLMForSequenceClassification
+    - forward
+XLMForMultipleChoice
+[[autodoc]] XLMForMultipleChoice
+    - forward
+XLMForTokenClassification
+[[autodoc]] XLMForTokenClassification
+    - forward
+XLMForQuestionAnsweringSimple
+[[autodoc]] XLMForQuestionAnsweringSimple
+    - forward
+XLMForQuestionAnswering
+[[autodoc]] XLMForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d22567bc01aadff98dea75a54fe66c002167113
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlm.txt_chunk_2.txt
@@ -0,0 +1,18 @@
+TFXLMModel
+[[autodoc]] TFXLMModel
+    - call
+TFXLMWithLMHeadModel
+[[autodoc]] TFXLMWithLMHeadModel
+    - call
+TFXLMForSequenceClassification
+[[autodoc]] TFXLMForSequenceClassification
+    - call
+TFXLMForMultipleChoice
+[[autodoc]] TFXLMForMultipleChoice
+    - call
+TFXLMForTokenClassification
+[[autodoc]] TFXLMForTokenClassification
+    - call
+TFXLMForQuestionAnsweringSimple
+[[autodoc]] TFXLMForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1051cba1ce4ac9b2f09eaf4c9b8d483bc14abef9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+XLNet
+
+Overview
+The XLNet model was proposed in XLNet: Generalized Autoregressive Pretraining for Language Understanding by Zhilin Yang, Zihang Dai, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov,
+Quoc V. Le. XLnet is an extension of the Transformer-XL model pre-trained using an autoregressive method to learn
+bidirectional contexts by maximizing the expected likelihood over all permutations of the input sequence factorization
+order.
+The abstract from the paper is the following:
+With the capability of modeling bidirectional contexts, denoising autoencoding based pretraining like BERT achieves
+better performance than pretraining approaches based on autoregressive language modeling. However, relying on
+corrupting the input with masks, BERT neglects dependency between the masked positions and suffers from a
+pretrain-finetune discrepancy. In light of these pros and cons, we propose XLNet, a generalized autoregressive
+pretraining method that (1) enables learning bidirectional contexts by maximizing the expected likelihood over all
+permutations of the factorization order and (2) overcomes the limitations of BERT thanks to its autoregressive
+formulation. Furthermore, XLNet integrates ideas from Transformer-XL, the state-of-the-art autoregressive model, into
+pretraining. Empirically, under comparable experiment settings, XLNet outperforms BERT on 20 tasks, often by a large
+margin, including question answering, natural language inference, sentiment analysis, and document ranking.
+This model was contributed by thomwolf. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bac6f51f399093e8da62fbd742b2d5391ec9a049
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_1.txt
@@ -0,0 +1,17 @@
+The specific attention pattern can be controlled at training and test time using the perm_mask input.
+Due to the difficulty of training a fully auto-regressive model over various factorization order, XLNet is pretrained
+  using only a sub-set of the output tokens as target which are selected with the target_mapping input.
+To use XLNet for sequential decoding (i.e. not in fully bi-directional setting), use the perm_mask and
+  target_mapping inputs to control the attention span and outputs (see examples in
+  examples/pytorch/text-generation/run_generation.py)
+XLNet is one of the few models that has no sequence length limit.
+XLNet is not a traditional autoregressive model but uses a training strategy that builds on that. It permutes the tokens in the sentence, then allows the model to use the last n tokens to predict the token n+1. Since this is all done with a mask, the sentence is actually fed in the model in the right order, but instead of masking the first n tokens for n+1, XLNet uses a mask that hides the previous tokens in some given permutation of 1,…,sequence length.
+XLNet also uses the same recurrence mechanism as Transformer-XL to build long-term dependencies.
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ffab738fefcbb6839a8b13b93a6788c9cbbbc15
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Multiple choice task guide
+
+XLNetConfig
+[[autodoc]] XLNetConfig
+XLNetTokenizer
+[[autodoc]] XLNetTokenizer
+    - build_inputs_with_special_tokens
+    - get_special_tokens_mask
+    - create_token_type_ids_from_sequences
+    - save_vocabulary
+XLNetTokenizerFast
+[[autodoc]] XLNetTokenizerFast
+XLNet specific outputs
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringSimpleOutput
+[[autodoc]] models.xlnet.modeling_xlnet.XLNetForQuestionAnsweringOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetLMHeadModelOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForSequenceClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForMultipleChoiceOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForTokenClassificationOutput
+[[autodoc]] models.xlnet.modeling_tf_xlnet.TFXLNetForQuestionAnsweringSimpleOutput
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e78ef24d91ce0fb2fb4c51f3698a3fbaeb2a809
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlnet.txt_chunk_3.txt
@@ -0,0 +1,40 @@
+XLNetModel
+[[autodoc]] XLNetModel
+    - forward
+XLNetLMHeadModel
+[[autodoc]] XLNetLMHeadModel
+    - forward
+XLNetForSequenceClassification
+[[autodoc]] XLNetForSequenceClassification
+    - forward
+XLNetForMultipleChoice
+[[autodoc]] XLNetForMultipleChoice
+    - forward
+XLNetForTokenClassification
+[[autodoc]] XLNetForTokenClassification
+    - forward
+XLNetForQuestionAnsweringSimple
+[[autodoc]] XLNetForQuestionAnsweringSimple
+    - forward
+XLNetForQuestionAnswering
+[[autodoc]] XLNetForQuestionAnswering
+    - forward
+
+TFXLNetModel
+[[autodoc]] TFXLNetModel
+    - call
+TFXLNetLMHeadModel
+[[autodoc]] TFXLNetLMHeadModel
+    - call
+TFXLNetForSequenceClassification
+[[autodoc]] TFXLNetForSequenceClassification
+    - call
+TFLNetForMultipleChoice
+[[autodoc]] TFXLNetForMultipleChoice
+    - call
+TFXLNetForTokenClassification
+[[autodoc]] TFXLNetForTokenClassification
+    - call
+TFXLNetForQuestionAnsweringSimple
+[[autodoc]] TFXLNetForQuestionAnsweringSimple
+    - call
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..686b45cc5e9df0acded8ef58b344383814df540b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xls_r.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLS-R
+Overview
+The XLS-R model was proposed in XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale by Arun Babu, Changhan Wang, Andros Tjandra, Kushal Lakhotia, Qiantong Xu, Naman
+Goyal, Kritika Singh, Patrick von Platen, Yatharth Saraf, Juan Pino, Alexei Baevski, Alexis Conneau, Michael Auli.
+The abstract from the paper is the following:
+This paper presents XLS-R, a large-scale model for cross-lingual speech representation learning based on wav2vec 2.0.
+We train models with up to 2B parameters on nearly half a million hours of publicly available speech audio in 128
+languages, an order of magnitude more public data than the largest known prior work. Our evaluation covers a wide range
+of tasks, domains, data regimes and languages, both high and low-resource. On the CoVoST-2 speech translation
+benchmark, we improve the previous state of the art by an average of 7.4 BLEU over 21 translation directions into
+English. For speech recognition, XLS-R improves over the best known prior work on BABEL, MLS, CommonVoice as well as
+VoxPopuli, lowering error rates by 14-34% relative on average. XLS-R also sets a new state of the art on VoxLingua107
+language identification. Moreover, we show that with sufficient model size, cross-lingual pretraining can outperform
+English-only pretraining when translating English speech into other languages, a setting which favors monolingual
+pretraining. We hope XLS-R can help to improve speech processing tasks for many more languages of the world.
+Relevant checkpoints can be found under https://huggingface.co/models?other=xls_r.
+The original code can be found here.
+Usage tips
+
+XLS-R is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLS-R model was trained using connectionist temporal classification (CTC) so the model output has to be decoded using
+  [Wav2Vec2CTCTokenizer].
+
+XLS-R's architecture is based on the Wav2Vec2 model, refer to Wav2Vec2's documentation page for API reference.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..081fb663edb86a0d71d107c0c964e9f08c46d574
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xlsr_wav2vec2.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+XLSR-Wav2Vec2
+Overview
+The XLSR-Wav2Vec2 model was proposed in Unsupervised Cross-Lingual Representation Learning For Speech Recognition by Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, Michael
+Auli.
+The abstract from the paper is the following:
+This paper presents XLSR which learns cross-lingual speech representations by pretraining a single model from the raw
+waveform of speech in multiple languages. We build on wav2vec 2.0 which is trained by solving a contrastive task over
+masked latent speech representations and jointly learns a quantization of the latents shared across languages. The
+resulting model is fine-tuned on labeled data and experiments show that cross-lingual pretraining significantly
+outperforms monolingual pretraining. On the CommonVoice benchmark, XLSR shows a relative phoneme error rate reduction
+of 72% compared to the best known results. On BABEL, our approach improves word error rate by 16% relative compared to
+a comparable system. Our approach enables a single multilingual speech recognition model which is competitive to strong
+individual models. Analysis shows that the latent discrete speech representations are shared across languages with
+increased sharing for related languages. We hope to catalyze research in low-resource speech understanding by releasing
+XLSR-53, a large model pretrained in 53 languages.
+The original code can be found here.
+Note: Meta (FAIR) released a new version of Wav2Vec2-BERT 2.0 - it's pretrained on 4.5M hours of audio. We especially recommend using it for fine-tuning tasks, e.g. as per this guide.
+Usage tips
+
+XLSR-Wav2Vec2 is a speech model that accepts a float array corresponding to the raw waveform of the speech signal.
+XLSR-Wav2Vec2 model was trained using connectionist temporal classification (CTC) so the model output has to be
+  decoded using [Wav2Vec2CTCTokenizer].
+
+XLSR-Wav2Vec2's architecture is based on the Wav2Vec2 model, so one can refer to Wav2Vec2's documentation page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27474d26b92fa39299cbe087c33dcaf196b8de89
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_0.txt
@@ -0,0 +1,22 @@
+X-MOD
+Overview
+The X-MOD model was proposed in Lifting the Curse of Multilinguality by Pre-training Modular Transformers by Jonas Pfeiffer, Naman Goyal, Xi Lin, Xian Li, James Cross, Sebastian Riedel, and Mikel Artetxe.
+X-MOD extends multilingual masked language models like XLM-R to include language-specific modular components (language adapters) during pre-training. For fine-tuning, the language adapters in each transformer layer are frozen.
+The abstract from the paper is the following:
+Multilingual pre-trained models are known to suffer from the curse of multilinguality, which causes per-language performance to drop as they cover more languages. We address this issue by introducing language-specific modules, which allows us to grow the total capacity of the model, while keeping the total number of trainable parameters per language constant. In contrast with prior work that learns language-specific components post-hoc, we pre-train the modules of our Cross-lingual Modular (X-MOD) models from the start. Our experiments on natural language inference, named entity recognition and question answering show that our approach not only mitigates the negative interference between languages, but also enables positive transfer, resulting in improved monolingual and cross-lingual performance. Furthermore, our approach enables adding languages post-hoc with no measurable drop in performance, no longer limiting the model usage to the set of pre-trained languages.
+This model was contributed by jvamvas.
+The original code can be found here and the original documentation is found here.
+Usage tips
+Tips:
+- X-MOD is similar to XLM-R, but a difference is that the input language needs to be specified so that the correct language adapter can be activated.
+- The main models – base and large – have adapters for 81 languages.
+Adapter Usage
+Input language
+There are two ways to specify the input language:
+1. By setting a default language before using the model:
+thon
+from transformers import XmodModel
+model = XmodModel.from_pretrained("facebook/xmod-base")
+model.set_default_language("en_XX")
+
+By explicitly passing the index of the language adapter for each sample:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4302aeb2967b41c7648d6576e5a1966317833f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_xmod.txt_chunk_1.txt
@@ -0,0 +1,62 @@
+By explicitly passing the index of the language adapter for each sample:
+
+thon
+import torch
+input_ids = torch.tensor(
+    [
+        [0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2],
+        [0, 1310, 49083, 443, 269, 71, 5486, 165, 60429, 660, 23, 2],
+    ]
+)
+lang_ids = torch.LongTensor(
+    [
+        0,  # en_XX
+        8,  # de_DE
+    ]
+)
+output = model(input_ids, lang_ids=lang_ids)
+
+Fine-tuning
+The paper recommends that the embedding layer and the language adapters are frozen during fine-tuning. A method for doing this is provided:
+thon
+model.freeze_embeddings_and_language_adapters()
+Fine-tune the model 
+
+Cross-lingual transfer
+After fine-tuning, zero-shot cross-lingual transfer can be tested by activating the language adapter of the target language:
+thon
+model.set_default_language("de_DE")
+Evaluate the model on German examples 
+
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Causal language modeling task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+XmodConfig
+[[autodoc]] XmodConfig
+XmodModel
+[[autodoc]] XmodModel
+    - forward
+XmodForCausalLM
+[[autodoc]] XmodForCausalLM
+    - forward
+XmodForMaskedLM
+[[autodoc]] XmodForMaskedLM
+    - forward
+XmodForSequenceClassification
+[[autodoc]] XmodForSequenceClassification
+    - forward
+XmodForMultipleChoice
+[[autodoc]] XmodForMultipleChoice
+    - forward
+XmodForTokenClassification
+[[autodoc]] XmodForTokenClassification
+    - forward
+XmodForQuestionAnswering
+[[autodoc]] XmodForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1efcc2f15cec21088fced2e3ed74bbd6d667b4b6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+YOLOS
+Overview
+The YOLOS model was proposed in You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
+YOLOS proposes to just leverage the plain Vision Transformer (ViT) for object detection, inspired by DETR. It turns out that a base-sized encoder-only Transformer can also achieve 42 AP on COCO, similar to DETR and much more complex frameworks such as Faster R-CNN.
+The abstract from the paper is the following:
+Can Transformer perform 2D object- and region-level recognition from a pure sequence-to-sequence perspective with minimal knowledge about the 2D spatial structure? To answer this question, we present You Only Look at One Sequence (YOLOS), a series of object detection models based on the vanilla Vision Transformer with the fewest possible modifications, region priors, as well as inductive biases of the target task. We find that YOLOS pre-trained on the mid-sized ImageNet-1k dataset only can already achieve quite competitive performance on the challenging COCO object detection benchmark, e.g., YOLOS-Base directly adopted from BERT-Base architecture can obtain 42.0 box AP on COCO val. We also discuss the impacts as well as limitations of current pre-train schemes and model scaling strategies for Transformer in vision through YOLOS.
+
+ YOLOS architecture. Taken from the original paper.
+This model was contributed by nielsr. The original code can be found here.
+Using Scaled Dot Product Attention (SDPA)
+PyTorch includes a native scaled dot-product attention (SDPA) operator as part of torch.nn.functional. This function 
+encompasses several implementations that can be applied depending on the inputs and the hardware in use. See the 
+official documentation 
+or the GPU Inference
+page for more information.
+SDPA is used by default for torch>=2.1.1 when an implementation is available, but you may also set 
+attn_implementation="sdpa" in from_pretrained() to explicitly request SDPA to be used.
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-base", attn_implementation="sdpa", torch_dtype=torch.float16)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..35682ba532c22727b91dd17a1d084ffd696bca6a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yolos.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+For the best speedups, we recommend loading the model in half-precision (e.g. torch.float16 or torch.bfloat16).
+On a local benchmark (A100-40GB, PyTorch 2.3.0, OS Ubuntu 22.04) with float32 and hustvl/yolos-base model, we saw the following speedups during inference.
+|   Batch size |   Average inference time (ms), eager mode |   Average inference time (ms), sdpa model |   Speed up, Sdpa / Eager (x) |
+|--------------|-------------------------------------------|-------------------------------------------|------------------------------|
+|            1 |                                       106 |                                        76 |                      1.39 |
+|            2 |                                       154 |                                        90 |                      1.71 |
+|            4 |                                       222 |                                       116 |                      1.91 |
+|            8 |                                       368 |                                       168 |                      2.19 |
+Resources
+A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with YOLOS.
+
+All example notebooks illustrating inference + fine-tuning [YolosForObjectDetection] on a custom dataset can be found here.
+Scripts for finetuning [YolosForObjectDetection] with [Trainer] or Accelerate can be found here.
+See also: Object detection task guide
+
+If you're interested in submitting a resource to be included here, please feel free to open a Pull Request and we'll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.
+
+Use [YolosImageProcessor] for preparing images (and optional targets) for the model. Contrary to DETR, YOLOS doesn't require a pixel_mask to be created.
+
+YolosConfig
+[[autodoc]] YolosConfig
+YolosImageProcessor
+[[autodoc]] YolosImageProcessor
+    - preprocess
+    - pad
+    - post_process_object_detection
+YolosFeatureExtractor
+[[autodoc]] YolosFeatureExtractor
+    - call
+    - pad
+    - post_process_object_detection
+YolosModel
+[[autodoc]] YolosModel
+    - forward
+YolosForObjectDetection
+[[autodoc]] YolosForObjectDetection
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..992b9d249276db52bb3656f0c104057675027a8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+YOSO
+Overview
+The YOSO model was proposed in You Only Sample (Almost) Once: Linear Cost Self-Attention Via Bernoulli Sampling
+by Zhanpeng Zeng, Yunyang Xiong, Sathya N. Ravi, Shailesh Acharya, Glenn Fung, Vikas Singh. YOSO approximates standard softmax self-attention
+via a Bernoulli sampling scheme based on Locality Sensitive Hashing (LSH). In principle, all the Bernoulli random variables can be sampled with
+a single hash. 
+The abstract from the paper is the following:
+Transformer-based models are widely used in natural language processing (NLP). Central to the transformer model is 
+the self-attention mechanism, which captures the interactions of token pairs in the input sequences and depends quadratically 
+on the sequence length. Training such models on longer sequences is expensive. In this paper, we show that a Bernoulli sampling 
+attention mechanism based on Locality Sensitive Hashing (LSH), decreases the quadratic complexity of such models to linear. 
+We bypass the quadratic cost by considering self-attention as a sum of individual tokens associated with Bernoulli random 
+variables that can, in principle, be sampled at once by a single hash (although in practice, this number may be a small constant). 
+This leads to an efficient sampling scheme to estimate self-attention which relies on specific modifications of 
+LSH (to enable deployment on GPU architectures). We evaluate our algorithm on the GLUE benchmark with standard 512 sequence 
+length where we see favorable performance relative to a standard pretrained Transformer. On the Long Range Arena (LRA) benchmark, 
+for evaluating performance on long sequences, our method achieves results consistent with softmax self-attention but with sizable 
+speed-ups and memory savings and often outperforms other efficient self-attention methods. Our code is available at this https URL
+This model was contributed by novice03. The original code can be found here.
+Usage tips
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0243583693dc3e08993b0276ba8723f8b3a9729a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/model_doc_yoso.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+The YOSO attention algorithm is implemented through custom CUDA kernels, functions written in CUDA C++ that can be executed multiple times
+in parallel on a GPU.
+The kernels provide a fast_hash function, which approximates the random projections of the queries and keys using the Fast Hadamard Transform. Using these
+hash codes, the lsh_cumulation function approximates self-attention via LSH-based Bernoulli sampling.
+To use the custom kernels, the user should set config.use_expectation = False. To ensure that the kernels are compiled successfully, 
+the user must install the correct version of PyTorch and cudatoolkit. By default, config.use_expectation = True, which uses YOSO-E and 
+does not require compiling CUDA kernels.
+
+ 
+ YOSO Attention Algorithm. Taken from the original paper.
+Resources
+
+Text classification task guide
+Token classification task guide
+Question answering task guide
+Masked language modeling task guide
+Multiple choice task guide
+
+YosoConfig
+[[autodoc]] YosoConfig
+YosoModel
+[[autodoc]] YosoModel
+    - forward
+YosoForMaskedLM
+[[autodoc]] YosoForMaskedLM
+    - forward
+YosoForSequenceClassification
+[[autodoc]] YosoForSequenceClassification
+    - forward
+YosoForMultipleChoice
+[[autodoc]] YosoForMultipleChoice
+    - forward
+YosoForTokenClassification
+[[autodoc]] YosoForTokenClassification
+    - forward
+YosoForQuestionAnswering
+[[autodoc]] YosoForQuestionAnswering
+    - forward
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c10baa5c9ab1c2dbe6f203131261d22ed07e66b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_0.txt
@@ -0,0 +1,19 @@
+AQLM
+
+[!TIP]
+Try AQLM on Google Colab!
+
+Additive Quantization of Language Models (AQLM) is a Large Language Models compression method. It quantizes multiple weights together and take advantage of interdependencies between them. AQLM represents groups of 8-16 weights as a sum of multiple vector codes.
+Inference support for AQLM is realised in the aqlm library. Make sure to install it to run the models (note aqlm works only with python>=3.10):
+
+pip install aqlm[gpu,cpu]
+The library provides efficient kernels for both GPU and CPU inference and training.
+The instructions on how to quantize models yourself, as well as all the relevant code can be found in the corresponding GitHub repository. To run AQLM models simply load a model that has been quantized with AQLM:
+thon
+from transformers import AutoTokenizer, AutoModelForCausalLM
+quantized_model = AutoModelForCausalLM.from_pretrained(
+    "ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf",
+    torch_dtype="auto", 
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca845a0a99c901106aeae5642c683abfe724fa40
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_aqlm.txt_chunk_1.txt
@@ -0,0 +1,10 @@
+PEFT
+Starting with version aqlm 1.0.2, AQLM supports Parameter-Efficient Fine-Tuning in a form of LoRA integrated into the PEFT library.
+AQLM configurations
+AQLM quantization setups vary mainly on the number of codebooks used as well as codebook sizes in bits. The most popular setups, as well as inference kernels they support are:
+| Kernel | Number of codebooks | Codebook size, bits | Notation | Accuracy | Speedup     | Fast GPU inference | Fast CPU inference |
+|---|---------------------|---------------------|----------|-------------|-------------|--------------------|--------------------|
+| Triton | K                   | N                  | KxN     | -        | Up to ~0.7x | ✅                  | ❌                  |
+| CUDA | 1                   | 16                  | 1x16     | Best        | Up to ~1.3x | ✅                  | ❌                  |
+| CUDA | 2                   | 8                   | 2x8      | OK          | Up to ~3.0x | ✅                  | ❌                  |
+| Numba | K                   | 8                   | Kx8      | Good        | Up to ~4.0x | ❌                  | ✅                  |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42d631be7d35890d6483a4ecea0334731896cda7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+AWQ
+
+Try AWQ quantization with this notebook!
+
+Activation-aware Weight Quantization (AWQ) doesn't quantize all the weights in a model, and instead, it preserves a small percentage of weights that are important for LLM performance. This significantly reduces quantization loss such that you can run models in 4-bit precision without experiencing any performance degradation.
+There are several libraries for quantizing models with the AWQ algorithm, such as llm-awq, autoawq or optimum-intel. Transformers supports loading models quantized with the llm-awq and autoawq libraries. This guide will show you how to load models quantized with autoawq, but the process is similar for llm-awq quantized models.
+Make sure you have autoawq installed:
+
+pip install autoawq
+AWQ-quantized models can be identified by checking the quantization_config attribute in the model's config.json file:
+json
+{
+  "_name_or_path": "/workspace/process/huggingfaceh4_zephyr-7b-alpha/source",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  
+  
+  
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
+}
+A quantized model is loaded with the [~PreTrainedModel.from_pretrained] method. If you loaded your model on the CPU, make sure to move it to a GPU device first. Use the device_map parameter to specify where to place the model:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+Loading an AWQ-quantized model automatically sets other weights to fp16 by default for performance reasons. If you want to load these other weights in a different format, use the torch_dtype parameter:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "TheBloke/zephyr-7B-alpha-AWQ"
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5f327856db20aa42fdf9dc5b5288e024d597c67
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_1.txt
@@ -0,0 +1,22 @@
+AWQ quantization can also be combined with FlashAttention-2 to further accelerate inference:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-alpha-AWQ", attn_implementation="flash_attention_2", device_map="cuda:0")
+
+Fused modules
+Fused modules offers improved accuracy and performance and it is supported out-of-the-box for AWQ modules for Llama and Mistral architectures, but you can also fuse AWQ modules for unsupported architectures.
+
+Fused modules cannot be combined with other optimization techniques such as FlashAttention-2.
+
+To enable fused modules for supported architectures, create an [AwqConfig] and set the parameters fuse_max_seq_len and do_fuse=True. The fuse_max_seq_len parameter is the total sequence length and it should include the context length and the expected generation length. You can set it to a larger value to be safe.
+For example, to fuse the AWQ modules of the TheBloke/Mistral-7B-OpenOrca-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Mistral-7B-OpenOrca-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    do_fuse=True,
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63779633c7460f341b859a72eef8e3450f42db4c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_2.txt
@@ -0,0 +1,15 @@
+The TheBloke/Mistral-7B-OpenOrca-AWQ model was benchmarked with batch_size=1 with and without fused modules.
+Unfused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            60.0984 |           38.4537 | 4.50 GB (5.68%) |
+|            1 |               64 |              64 |          1333.67   |           31.6604 | 4.50 GB (5.68%) |
+|            1 |              128 |             128 |          2434.06   |           31.6272 | 4.50 GB (5.68%) |
+|            1 |              256 |             256 |          3072.26   |           38.1731 | 4.50 GB (5.68%) |
+|            1 |              512 |             512 |          3184.74   |           31.6819 | 4.59 GB (5.80%) |
+|            1 |             1024 |            1024 |          3148.18   |           36.8031 | 4.81 GB (6.07%) |
+|            1 |             2048 |            2048 |          2927.33   |           35.2676 | 5.73 GB (7.23%) |
+Fused module
+|   Batch Size |   Prefill Length |   Decode Length |   Prefill tokens/s |   Decode tokens/s | Memory (VRAM)   |
+|-------------:|-----------------:|----------------:|-------------------:|------------------:|:----------------|
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..999eb3c31110ebb7461cf8373725b512e6343b84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+|            1 |               32 |              32 |            81.4899 |           80.2569 | 4.00 GB (5.05%) |
+|            1 |               64 |              64 |          1756.1    |          106.26   | 4.00 GB (5.05%) |
+|            1 |              128 |             128 |          2479.32   |          105.631  | 4.00 GB (5.06%) |
+|            1 |              256 |             256 |          1813.6    |           85.7485 | 4.01 GB (5.06%) |
+|            1 |              512 |             512 |          2848.9    |           97.701  | 4.11 GB (5.19%) |
+|            1 |             1024 |            1024 |          3044.35   |           87.7323 | 4.41 GB (5.57%) |
+|            1 |             2048 |            2048 |          2715.11   |           89.4709 | 5.57 GB (7.04%) |
+The speed and throughput of fused and unfused modules were also tested with the optimum-benchmark library.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9869951368555184c7bf26b89dd8d4abd2aeb4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_4.txt
@@ -0,0 +1,33 @@
+forward peak memory/batch size
+
+generate throughput/batch size
+
+For architectures that don't support fused modules yet, you need to create a custom fusing mapping to define which modules need to be fused with the modules_to_fuse parameter. For example, to fuse the AWQ modules of the TheBloke/Yi-34B-AWQ model.
+thon
+import torch
+from transformers import AwqConfig, AutoModelForCausalLM
+model_id = "TheBloke/Yi-34B-AWQ"
+quantization_config = AwqConfig(
+    bits=4,
+    fuse_max_seq_len=512,
+    modules_to_fuse={
+        "attention": ["q_proj", "k_proj", "v_proj", "o_proj"],
+        "layernorm": ["ln1", "ln2", "norm"],
+        "mlp": ["gate_proj", "up_proj", "down_proj"],
+        "use_alibi": False,
+        "num_attention_heads": 56,
+        "num_key_value_heads": 8,
+        "hidden_size": 7168
+    }
+)
+model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config).to(0)
+
+The parameter modules_to_fuse should include:
+
+"attention": The names of the attention layers to fuse in the following order: query, key, value and output projection layer. If you don't want to fuse these layers, pass an empty list.
+"layernorm": The names of all the LayerNorm layers you want to replace with a custom fused LayerNorm. If you don't want to fuse these layers, pass an empty list.
+"mlp": The names of the MLP layers you want to fuse into a single MLP layer in the order: (gate (dense, layer, post-attention) / up / down layers).
+"use_alibi": If your model uses ALiBi positional embedding.
+"num_attention_heads": The number of attention heads.
+"num_key_value_heads": The number of key value heads that should be used to implement Grouped Query Attention (GQA). If num_key_value_heads=num_attention_heads, the model will use Multi Head Attention (MHA), if num_key_value_heads=1 the model will use Multi Query Attention (MQA), otherwise GQA is used.
+"hidden_size": The dimension of the hidden representations.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f52674e51a06aa2cf9fef4e9df881c4e64276efe
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_awq.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+ExLlama-v2 support
+Recent versions of autoawq supports ExLlama-v2 kernels for faster prefill and decoding. To get started, first install the latest version of autoawq by running:
+
+pip install git+https://github.com/casper-hansen/AutoAWQ.git
+Get started by passing an AwqConfig() with version="exllama".
+thon
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+quantization_config = AwqConfig(version="exllama")
+model = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/Mistral-7B-Instruct-v0.1-AWQ",
+    quantization_config=quantization_config,
+    device_map="auto",
+)
+input_ids = torch.randint(0, 100, (1, 128), dtype=torch.long, device="cuda")
+output = model(input_ids)
+print(output.logits)
+tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-AWQ")
+input_ids = tokenizer.encode("How to make a cake", return_tensors="pt").to(model.device)
+output = model.generate(input_ids, do_sample=True, max_length=50, pad_token_id=50256)
+print(tokenizer.decode(output[0], skip_special_tokens=True))
+
+Note this feature is supported on AMD GPUs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fe679b39074a0f6f172809680bb87253c9c12b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_0.txt
@@ -0,0 +1,31 @@
+bitsandbytes
+bitsandbytes is the easiest option for quantizing a model to 8 and 4-bit. 8-bit quantization multiplies outliers in fp16 with non-outliers in int8, converts the non-outlier values back to fp16, and then adds them together to return the weights in fp16. This reduces the degradative effect outlier values have on a model's performance. 4-bit quantization compresses a model even further, and it is commonly used with QLoRA to finetune quantized LLMs.
+To use bitsandbytes, make sure you have the following libraries installed:
+
+pip install transformers accelerate bitsandbytes>0.37.0
+
+pip install bitsandbytes>=0.39.0
+pip install --upgrade accelerate transformers
+
+Now you can quantize a model by passing a BitsAndBytesConfig to [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it supports loading with Accelerate and contains torch.nn.Linear layers.
+
+Quantizing a model in 8-bit halves the memory-usage, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7", 
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m", 
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_8bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8abab14954873f481c96ddf7eccb6312460f220d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_1.txt
@@ -0,0 +1,33 @@
+Once a model is quantized to 8-bit, you can't push the quantized weights to the Hub unless you're using the latest version of Transformers and bitsandbytes. If you have the latest versions, then you can push the 8-bit model to the Hub with the [~PreTrainedModel.push_to_hub] method. The quantization config.json file is pushed first, followed by the quantized model weights.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-560m", 
+    quantization_config=quantization_config
+)
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+model.push_to_hub("bloom-560m-8bit")
+
+Quantizing a model in 4-bit reduces your memory-usage by 4x, and for large models, set device_map="auto" to efficiently use the GPUs available:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    quantization_config=quantization_config
+)
+
+By default, all the other modules such as torch.nn.LayerNorm are converted to torch.float16. You can change the data type of these modules with the torch_dtype parameter if you want:
+
+import torch
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+model_4bit = AutoModelForCausalLM.from_pretrained(
+    "facebook/opt-350m",
+    quantization_config=quantization_config, 
+    torch_dtype=torch.float32
+)
+model_4bit.model.decoder.layers[-1].final_layer_norm.weight.dtype
+
+If you have bitsandbytes>=0.41.3, you can serialize 4-bit models and push them on Hugging Face Hub. Simply call model.push_to_hub() after loading it in 4-bit precision. You can also save the serialized 4-bit models locally with model.save_pretrained() command.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..10861662fe40f266495ad0b2066194376dd8a9f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_2.txt
@@ -0,0 +1,20 @@
+Training with 8-bit and 4-bit weights are only supported for training extra parameters.
+
+You can check your memory footprint with the get_memory_footprint method:
+py
+print(model.get_memory_footprint())
+Quantized models can be loaded from the [~PreTrainedModel.from_pretrained] method without needing to specify the load_in_8bit or load_in_4bit parameters:
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("{your_username}/bloom-560m-8bit", device_map="auto")
+
+8-bit (LLM.int8() algorithm)
+
+Learn more about the details of 8-bit quantization in this blog post!
+
+This section explores some of the specific features of 8-bit models, such as offloading, outlier thresholds, skipping module conversion, and finetuning.
+Offloading
+8-bit models can offload weights between the CPU and GPU to support fitting very large models into memory. The weights dispatched to the CPU are actually stored in float32, and aren't converted to 8-bit. For example, to enable offloading for the bigscience/bloom-1b7 model, start by creating a [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57ca04966cac613274cfaccef4ba424313743147
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_3.txt
@@ -0,0 +1,36 @@
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
+
+Design a custom device map to fit everything on your GPU except for the lm_head, which you'll dispatch to the CPU:
+py
+device_map = {
+    "transformer.word_embeddings": 0,
+    "transformer.word_embeddings_layernorm": 0,
+    "lm_head": "cpu",
+    "transformer.h": 0,
+    "transformer.ln_f": 0,
+}
+Now load your model with the custom device_map and quantization_config:
+py
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    "bigscience/bloom-1b7",
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+Outlier threshold
+An "outlier" is a hidden state value greater than a certain threshold, and these values are computed in fp16. While the values are usually normally distributed ([-3.5, 3.5]), this distribution can be very different for large models ([-60, 6] or [6, 60]). 8-bit quantization works well for values ~5, but beyond that, there is a significant performance penalty. A good default threshold value is 6, but a lower threshold may be needed for more unstable models (small models or finetuning).
+To find the best threshold for your model, we recommend experimenting with the llm_int8_threshold parameter in [BitsAndBytesConfig]:
+
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_threshold=10,
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map=device_map,
+    quantization_config=quantization_config,
+)
+
+Skip module conversion
+For some models, like Jukebox, you don't need to quantize every module to 8-bit which can actually cause instability. With Jukebox, there are several lm_head modules that should be skipped using the llm_int8_skip_modules parameter in [BitsAndBytesConfig]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eec63e54d3a37d6384014a42b59861e1303a5358
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_4.txt
@@ -0,0 +1,34 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_id = "bigscience/bloom-1b7"
+quantization_config = BitsAndBytesConfig(
+    llm_int8_skip_modules=["lm_head"],
+)
+model_8bit = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    quantization_config=quantization_config,
+)
+
+Finetuning
+With the PEFT library, you can finetune large models like flan-t5-large and facebook/opt-6.7b with 8-bit quantization. You don't need to pass the device_map parameter for training because it'll automatically load your model on a GPU. However, you can still customize the device map with the device_map parameter if you want to (device_map="auto" should only be used for inference).
+4-bit (QLoRA algorithm)
+
+Try 4-bit quantization in this notebook and learn more about it's details in this blog post.
+
+This section explores some of the specific features of 4-bit models, such as changing the compute data type, using the Normal Float 4 (NF4) data type, and using nested quantization.
+Compute data type
+To speedup computation, you can change the data type from float32 (the default value) to bf16 using the bnb_4bit_compute_dtype parameter in [BitsAndBytesConfig]:
+
+import torch
+from transformers import BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
+
+Normal Float 4 (NF4)
+NF4 is a 4-bit data type from the QLoRA paper, adapted for weights initialized from a normal distribution. You should use NF4 for training 4-bit base models. This can be configured with the bnb_4bit_quant_type parameter in the [BitsAndBytesConfig]:
+
+from transformers import BitsAndBytesConfig
+nf4_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+)
+model_nf4 = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96ec28ea7041e22ecca23ad204b01875761a07c0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_bitsandbytes.txt_chunk_5.txt
@@ -0,0 +1,23 @@
+For inference, the bnb_4bit_quant_type does not have a huge impact on performance. However, to remain consistent with the model weights, you should use the bnb_4bit_compute_dtype and torch_dtype values.
+Nested quantization
+Nested quantization is a technique that can save additional memory at no additional performance cost. This feature performs a second quantization of the already quantized weights to save an addition 0.4 bits/parameter. For example, with nested quantization, you can finetune a Llama-13b model on a 16GB NVIDIA T4 GPU with a sequence length of 1024, a batch size of 1, and enabling gradient accumulation with 4 steps.
+
+from transformers import BitsAndBytesConfig
+double_quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_use_double_quant=True,
+)
+model_double_quant = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b", quantization_config=double_quant_config)
+
+Dequantizing bitsandbytes models
+Once quantized, you can dequantize the model to the original precision but this might result in a small quality loss of the model. Make sure you have enough GPU RAM to fit the dequantized model. 
+thon
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
+model_id = "facebook/opt-125m"
+model = AutoModelForCausalLM.from_pretrained(model_id, BitsAndBytesConfig(load_in_4bit=True))
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model.dequantize()
+text = tokenizer("Hello my name is", return_tensors="pt").to(0)
+out = model.generate(**text)
+print(tokenizer.decode(out[0]))
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1686765132264e69db84038eeb93ff8374f20dcb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_0.txt
@@ -0,0 +1,25 @@
+Contribute new quantization method
+Transformers supports and integrates many quantization methods such as QLoRA, GPTQ, LLM.int8, and AWQ. However, there are other quantization approaches that are not yet integrated. To make adding and using these quantization methods with Transformers models easier, you should use the [HfQuantizer] class. The [HfQuantizer] is designed as an internal helper class for adding a quantization method instead of something you apply to every PyTorch module.
+This guide will show you how to integrate a new quantization method with the [HfQuantizer] class.
+Requirements
+Before integrating a new quantization method into Transformers, ensure the method you are trying to add meets the following prerequisites. Only quantization methods that can be run with PyTorch modules are currently supported.
+
+The quantization method is available through a Python package that is pip-installable by anyone (it is also fine if you can only install the package from source). Ideally, pre-compiled kernels are included in the pip package.
+The method can run on commonly-used hardware (CPU, GPU, ).
+The method is wrapped in a nn.Module (e.g., Linear8bitLt, Linear4bit), and the quantized linear layer should have the following definition:
+
+class Linear4bit(nn.Module):
+    def init(self, ):
+        
+def forward(self, x):
+    return my_4bit_kernel(x, self.weight, self.bias)
+
+This way, Transformers models can be easily quantized by replacing some instances of nn.Linear with a target class.
+
+The quantization method should be serializable. You can save the quantized weights locally or push them to the Hub.
+Make sure the package that contains the quantization kernels/primitive is stable (no frequent breaking changes).
+
+For some quantization methods, they may require "pre-quantizing" the models through data calibration (e.g., AWQ). In this case, we prefer to only support inference in Transformers and let the third-party library maintained by the ML community deal with the model quantization itself.
+Build a new HFQuantizer class
+
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05470f1fa8e40d65476d000b4ad25cd36654adcc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_1.txt
@@ -0,0 +1,15 @@
+Create a new quantization config class inside src/transformers/utils/quantization_config.py and make sure to expose the new quantization config inside Transformers main init by adding it to the _import_structure object of src/transformers/init.py.
+
+Create a new file inside src/transformers/quantizers/ named quantizer_your_method.py, and make it inherit from src/transformers/quantizers/base.py::HfQuantizer. Make sure to add the new quantizer and quantization config in the quantization auto-mapping in src/transformers/quantizers/auto.py.
+
+Define the following class attributes/property methods for your quantization method:
+
+requires_calibration: Whether the quantization method requires a data calibration process. If set to True, you can only support inference (with quantized weights) and not inference and quantization.
+
+required_packages: A list of strings of the required packages to use the quantized weights. You might need to define some new utility methods such as is_auto_awq_available in transformers/src/utils/import_utils.py.
+requires_parameters_quantization: Only required if your quantization method requires extra attention to the underlying nn.Parameter object. For example, bitsandbytes uses Params4bit and Int8Param, which requires some extra attention when quantizing the model. Most of the recent quantization method packs int2/int4 weights inside torch.uint8 weights, so this flag should not be really required (set to False by default).
+is_serializable: A property method to determine whether the method is serializable or not.
+
+is_trainable:  A property method to determine whether you can fine-tune models on top of the quantization method (with or without PEFT approaches).
+
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1df61b030e0b24d3cdc198453ca2813bf1fc6af
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_contribute.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+Write the validate_environment and update_torch_dtype methods. These methods are called before creating the quantized model to ensure users use the right configuration. You can have a look at how this is done on other quantizers.
+
+Write the _process_model_before_weight_loading method. In Transformers, the quantized models are initialized first on the "meta" device before loading the weights. This means the _process_model_before_weight_loading method takes care of manipulating the model skeleton to replace some modules (e.g., nn.Linear) with the target modules (quantization modules). You can define a module replacement logic or any other utility method by creating a new file in transformers/src/integrations/ and exposing the relevant methods in that folder's __init__.py file. The best starting point would be to have a look at another quantization methods such as quantizer_awq.py.
+
+Write the _process_model_after_weight_loading method. This method enables implementing additional features that require manipulating the model after loading the weights.
+
+Document everything! Make sure your quantization method is documented by adding a new file under docs/source/en/quantization and adding a new row in the table in docs/source/en/quantization/overview.md.
+
+Add tests! You should add tests by first adding the package in our nightly Dockerfile inside docker/transformers-quantization-latest-gpu and then adding a new test file in tests/quantization/xxx. Feel free to check out how it is implemented for other quantization methods.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_eetq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_eetq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..039bde374b870a3d11d22edd10c1d1b040ac56a9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_eetq.txt_chunk_0.txt
@@ -0,0 +1,20 @@
+EETQ
+The EETQ library supports int8 per-channel weight-only quantization for NVIDIA GPUS. The high-performance GEMM and GEMV kernels are from FasterTransformer and TensorRT-LLM. It requires no calibration dataset and does not need to pre-quantize your model. Moreover, the accuracy degradation is negligible owing to the per-channel quantization. 
+Make sure you have eetq installed from the relase page
+pip install --no-cache-dir https://github.com/NetEase-FuXi/EETQ/releases/download/v1.0.0/EETQ-1.0.0+cu121+torch2.1.2-cp310-cp310-linux_x86_64.whl
+or via the source code https://github.com/NetEase-FuXi/EETQ. EETQ requires CUDA capability <= 8.9 and >= 7.0
+git clone https://github.com/NetEase-FuXi/EETQ.git
+cd EETQ/
+git submodule update --init --recursive
+pip install .
+An unquantized model can be quantized via "from_pretrained".
+py
+from transformers import AutoModelForCausalLM, EetqConfig
+path = "/path/to/model"
+quantization_config = EetqConfig("int8")
+model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", quantization_config=quantization_config)
+A quantized model can be saved via "saved_pretrained" and be reused again via the "from_pretrained".
+py
+quant_path = "/path/to/save/quantized/model"
+model.save_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path, device_map="auto")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e40226a8715356aac91c21c47a99958556ee0962
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_0.txt
@@ -0,0 +1,15 @@
+GPTQ
+
+Try GPTQ quantization with PEFT in this notebook and learn more about it's details in this blog post!
+
+The AutoGPTQ library implements the GPTQ algorithm, a post-training quantization technique where each row of the weight matrix is quantized independently to find a version of the weights that minimizes the error. These weights are quantized to int4, but they're restored to fp16 on the fly during inference. This can save your memory-usage by 4x because the int4 weights are dequantized in a fused kernel rather than a GPU's global memory, and you can also expect a speedup in inference because using a lower bitwidth takes less time to communicate.
+Before you begin, make sure the following libraries are installed:
+
+pip install auto-gptq
+pip install --upgrade accelerate optimum transformers
+To quantize a model (currently only supported for text models), you need to create a [GPTQConfig] class and set the number of bits to quantize to, a dataset to calibrate the weights for quantization, and a tokenizer to prepare the dataset.
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+gptq_config = GPTQConfig(bits=4, dataset="c4", tokenizer=tokenizer)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f940a2b0e752a283f5b52c298d6591922cbc3c5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_1.txt
@@ -0,0 +1,12 @@
+You could also pass your own dataset as a list of strings, but it is highly recommended to use the same dataset from the GPTQ paper.
+py
+dataset = ["auto-gptq is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
+gptq_config = GPTQConfig(bits=4, dataset=dataset, tokenizer=tokenizer)
+Load a model to quantize and pass the gptq_config to the [~AutoModelForCausalLM.from_pretrained] method. Set device_map="auto" to automatically offload the model to a CPU to help fit the model in memory, and allow the model modules to be moved between the CPU and GPU for quantization.
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=gptq_config)
+If you're running out of memory because a dataset is too large, disk offloading is not supported. If this is the case, try passing the max_memory parameter to allocate the amount of memory to use on your device (GPU and CPU):
+py
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", max_memory={0: "30GiB", 1: "46GiB", "cpu": "30GiB"}, quantization_config=gptq_config)
+
+Depending on your hardware, it can take some time to quantize a model from scratch. It can take ~5 minutes to quantize the facebook/opt-350m model on a free-tier Google Colab GPU, but it'll take ~4 hours to quantize a 175B parameter model on a NVIDIA A100. Before you quantize a model, it is a good idea to check the Hub if a GPTQ-quantized version of the model already exists.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b5279fbd00dadea627921c0ec362ae4ad1b64ccd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_2.txt
@@ -0,0 +1,26 @@
+Once your model is quantized, you can push the model and tokenizer to the Hub where it can be easily shared and accessed. Use the [~PreTrainedModel.push_to_hub] method to save the [GPTQConfig]:
+py
+quantized_model.push_to_hub("opt-125m-gptq")
+tokenizer.push_to_hub("opt-125m-gptq")
+You could also save your quantized model locally with the [~PreTrainedModel.save_pretrained] method. If the model was quantized with the device_map parameter, make sure to move the entire model to a GPU or CPU before saving it. For example, to save the model on a CPU:
+
+quantized_model.save_pretrained("opt-125m-gptq")
+tokenizer.save_pretrained("opt-125m-gptq")
+if quantized with device_map set
+quantized_model.to("cpu")
+quantized_model.save_pretrained("opt-125m-gptq")
+
+Reload a quantized model with the [~PreTrainedModel.from_pretrained] method, and set device_map="auto" to automatically distribute the model on all available GPUs to load the model faster without using more memory than needed.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto")
+
+ExLlama
+ExLlama is a Python/C++/CUDA implementation of the Llama model that is designed for faster inference with 4-bit GPTQ weights (check out these benchmarks). The ExLlama kernel is activated by default when you create a [GPTQConfig] object. To boost inference speed even further, use the ExLlamaV2 kernels by configuring the exllama_config parameter:
+
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, exllama_config={"version":2})
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="auto", quantization_config=gptq_config)
+
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..495701bf224e7db705d2ae840470376fd0379581
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_gptq.txt_chunk_3.txt
@@ -0,0 +1,8 @@
+Only 4-bit models are supported, and we recommend deactivating the ExLlama kernels if you're finetuning a quantized model with PEFT.
+
+The ExLlama kernels are only supported when the entire model is on the GPU. If you're doing inference on a CPU with AutoGPTQ (version > 0.4.2), then you'll need to disable the ExLlama kernel. This overwrites the attributes related to the ExLlama kernels in the quantization config of the config.json file.
+py
+import torch
+from transformers import AutoModelForCausalLM, GPTQConfig
+gptq_config = GPTQConfig(bits=4, use_exllama=False)
+model = AutoModelForCausalLM.from_pretrained("{your_username}/opt-125m-gptq", device_map="cpu", quantization_config=gptq_config)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae8fc9ae0bbd24551f8395db2291b479066c4b73
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_0.txt
@@ -0,0 +1,24 @@
+HQQ
+Half-Quadratic Quantization (HQQ) implements on-the-fly quantization via fast robust optimization. It doesn't require calibration data and can be used to quantize any model.
+Please refer to the official package for more details.
+For installation, we recommend you use the following approach to get the latest version and build its corresponding CUDA kernels:
+pip install hqq
+To quantize a model, you need to create an [HqqConfig]. There are two ways of doing it:
+``` Python
+from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
+Method 1: all linear layers will use the same quantization config
+quant_config  = HqqConfig(nbits=8, group_size=64, quant_zero=False, quant_scale=False, axis=0) #axis=0 is used by default
+
+``` Python
+Method 2: each linear layer with the same tag will use a dedicated quantization config
+q4_config = {'nbits':4, 'group_size':64, 'quant_zero':False, 'quant_scale':False}
+q3_config = {'nbits':3, 'group_size':32, 'quant_zero':False, 'quant_scale':False}
+quant_config  = HqqConfig(dynamic_config={
+  'self_attn.q_proj':q4_config,
+  'self_attn.k_proj':q4_config,
+  'self_attn.v_proj':q4_config,
+  'self_attn.o_proj':q4_config,
+'mlp.gate_proj':q3_config,
+  'mlp.up_proj'  :q3_config,
+  'mlp.down_proj':q3_config,
+})
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..03394dbf60a5d972340e41fe6c2b440093b6751f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_hqq.txt_chunk_1.txt
@@ -0,0 +1,13 @@
+The second approach is especially interesting for quantizing Mixture-of-Experts (MoEs) because the experts are less affected by lower quantization settings.
+Then you simply quantize the model as follows
+Python
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id, 
+    torch_dtype=torch.float16, 
+    device_map="cuda", 
+    quantization_config=quant_config
+)
+Optimized Runtime
+HQQ supports various backends, including pure Pytorch and custom dequantization CUDA kernels. These backends are suitable for older gpus and peft/QLoRA training.
+For faster inference, HQQ supports 4-bit fused kernels (TorchAO and Marlin), reaching up to 200 tokens/sec on a single 4090.
+For more details on how to use the backends, please refer to https://github.com/mobiusml/hqq/?tab=readme-ov-file#backend
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_optimum.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_optimum.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6094b6340f70b39b26fe1fec3b2080d411b714e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_optimum.txt_chunk_0.txt
@@ -0,0 +1,2 @@
+Optimum
+The Optimum library supports quantization for Intel, Furiosa, ONNX Runtime, GPTQ, and lower-level PyTorch quantization functions. Consider using Optimum for quantization if you're using specific and optimized hardware like Intel CPUs, Furiosa NPUs or a model accelerator like ONNX Runtime.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ab85bd2d834ff4246cc016a95ff98ed892a9b55
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_0.txt
@@ -0,0 +1,9 @@
+Quantization
+Quantization techniques focus on representing data with less information while also trying to not lose too much accuracy. This often means converting a data type to represent the same information with fewer bits. For example, if your model weights are stored as 32-bit floating points and they're quantized to 16-bit floating points, this halves the model size which makes it easier to store and reduces memory-usage. Lower precision can also speedup inference because it takes less time to perform calculations with fewer bits.
+
+Interested in adding a new quantization method to Transformers? Read the HfQuantizer guide to learn how!
+
+If you are new to the quantization field, we recommend you to check out these beginner-friendly courses about quantization in collaboration with DeepLearning.AI:
+
+Quantization Fundamentals with Hugging Face
+Quantization in Depth
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d6c2577cf0be8f612ff2451c800580c0783bd8b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_1.txt
@@ -0,0 +1,11 @@
+When to use what?
+The community has developed many quantization methods for various use cases. With Transformers, you can run any of these integrated methods depending on your use case because each method has their own pros and cons.
+For example, some quantization methods require calibrating the model with a dataset for more accurate and "extreme" compression (up to 1-2 bits quantization), while other methods work out of the box with on-the-fly quantization.
+Another parameter to consider is compatibility with your target device. Do you want to quantize on a CPU, GPU, or Apple silicon?
+In short, supporting a wide range of quantization methods allows you to pick the best quantization method for your specific use case.
+Use the table below to help you decide which quantization method to use.
+| Quantization method                 | On the fly quantization | CPU | CUDA GPU | RoCm GPU (AMD) | Metal (Apple Silicon) | torch.compile() support | Number of bits | Supports fine-tuning (through PEFT) | Serializable with 🤗 transformers | 🤗 transformers support | Link to library                             |
+|-------------------------------------|-------------------------|-----|----------|----------------|-----------------------|-------------------------|----------------|-------------------------------------|--------------|------------------------|---------------------------------------------|
+| AQLM                                | 🔴                       |  🟢   |     🟢     | 🔴              | 🔴                     | 🟢                      | 1 / 2          | 🟢                                   | 🟢            | 🟢                      | https://github.com/Vahe1994/AQLM            |
+| AWQ | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | ?                       | 4              | 🟢                                   | 🟢            | 🟢                      | https://github.com/casper-hansen/AutoAWQ    |
+| bitsandbytes                        | 🟢                       | 🔴   |     🟢     | 🔴              | 🔴                     | 🔴                       | 4 / 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/TimDettmers/bitsandbytes |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb87de155c4c3b07acc46a6c0872969e5aa0566
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_overview.txt_chunk_2.txt
@@ -0,0 +1,5 @@
+| EETQ                                | 🟢                       | 🔴   | 🟢        | 🔴              | 🔴                     | ?                       | 8              | 🟢                                   | 🟢            | 🟢                      | https://github.com/NetEase-FuXi/EETQ        |
+| GGUF / GGML (llama.cpp)             | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🔴                       | 1 - 8          | 🔴                                   | See GGUF section                | See GGUF section                      | https://github.com/ggerganov/llama.cpp      |
+| GPTQ                                | 🔴                       | 🔴   | 🟢        | 🟢              | 🔴                     | 🔴                       | 2 - 3 - 4 - 8          | 🟢                                   | 🟢            | 🟢                      | https://github.com/AutoGPTQ/AutoGPTQ        |
+| HQQ                                 | 🟢                       | 🟢    | 🟢        | 🔴              | 🔴                     | 🟢                       | 1 - 8          | 🟢                                   | 🔴            | 🟢                      | https://github.com/mobiusml/hqq/            |
+| Quanto                              | 🟢                       | 🟢   | 🟢        | 🔴              | 🟢                     | 🟢                       | 2 / 4 / 8      | 🔴                                   | 🔴            | 🟢                      | https://github.com/huggingface/quanto       |
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_quanto.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_quanto.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d5212d4784523b0997ee2888ea4c9c139df9a339
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/quantization_quanto.txt_chunk_0.txt
@@ -0,0 +1,16 @@
+Before you begin, make sure the following libraries are installed:
+
+pip install quanto accelerate transformers
+Now you can quantize a model by passing [QuantoConfig] object in the [~PreTrainedModel.from_pretrained] method. This works for any model in any modality, as long as it contains torch.nn.Linear layers. 
+The integration with transformers only supports weights quantization. For the more complex use case such as activation quantization, calibration and quantization aware training, you should use quanto library instead. 
+
+from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig
+model_id = "facebook/opt-125m"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+quantization_config = QuantoConfig(weights="int8")
+quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", quantization_config=quantization_config)
+
+Note that serialization is not supported yet with transformers but it is coming soon! If you want to save the model, you can use quanto library instead.
+Quanto library uses linear quantization algorithm for quantization. Even though this is a basic quantization technique, we get very good results! Have a look at the following becnhmark (llama-2-7b on perplexity metric). You can find more benchamarks here
+
+The library is versatible enough to be compatible with most PTQ optimization algorithms. The plan in the future is to integrate the most popular algorithms in the most seamless possible way (AWQ, Smoothquant).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce7493e16bf945ea3bb729a65d9632eef9d42e09
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Automatic speech recognition
+[[open-in-colab]]
+
+Automatic speech recognition (ASR) converts a speech signal to text, mapping a sequence of audio inputs to text outputs. Virtual assistants like Siri and Alexa use ASR models to help users everyday, and there are many other useful user-facing applications like live captioning and note-taking during meetings.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to transcribe audio to text.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate jiwer
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading a smaller subset of the MInDS-14 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train[:100]")
+
+Split the dataset's train split into a train and test set with the [~Dataset.train_test_split] method:
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 16
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 4
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and transcription in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..539c5e9f511f32a1bcadf09f1a6b4026811312e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_1.txt
@@ -0,0 +1,24 @@
+minds = minds.remove_columns(["english_transcription", "intent_class", "lang_id"])
+
+Take a look at the example again:
+
+minds["train"][0]
+{'audio': {'array': array([-0.00024414,  0.        ,  0.        , ,  0.00024414,
+          0.00024414,  0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 8000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file.
+transcription: the target text.
+
+Preprocess
+The next step is to load a Wav2Vec2 processor to process the audio signal:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000kHz (you can find this information in its dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a04f1e4b021113eb4dfee7ebc6cf1d94b2dea4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_2.txt
@@ -0,0 +1,25 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([-2.38064706e-04, -1.58618059e-04, -5.43987835e-06, ,
+          2.78103951e-04,  2.38446111e-04,  1.18740834e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+  'sampling_rate': 16000},
+ 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602ba9e2963e11ccd901cd4f.wav',
+ 'transcription': "hi I'm trying to use the banking app on my phone and currently my checking and savings account balance is not refreshing"}
+
+As you can see in the transcription above, the text contains a mix of upper and lowercase characters. The Wav2Vec2 tokenizer is only trained on uppercase characters so you'll need to make sure the text matches the tokenizer's vocabulary:
+
+def uppercase(example):
+     return {"transcription": example["transcription"].upper()}
+minds = minds.map(uppercase)
+
+Now create a preprocessing function that:
+
+Calls the audio column to load and resample the audio file.
+Extracts the input_values from the audio file and tokenize the transcription column with the processor.
+
+def prepare_dataset(batch):
+     audio = batch["audio"]
+     batch = processor(audio["array"], sampling_rate=audio["sampling_rate"], text=batch["transcription"])
+     batch["input_length"] = len(batch["input_values"][0])
+     return batch
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bb14c33bd8604da8df2cb554680df880bacbeee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by increasing the number of processes with the num_proc parameter. Remove the columns you don't need with the [~datasets.Dataset.remove_columns] method:
+
+encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
+
+🤗 Transformers doesn't have a data collator for ASR, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It'll also dynamically pad your text and labels to the length of the longest element in its batch (instead of the entire dataset) so they are a uniform length. While it is possible to pad your text in the tokenizer function by setting padding=True, dynamic padding is more efficient.
+Unlike other data collators, this specific data collator needs to apply a different padding method to input_values and labels:
+
+import torch
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+@dataclass
+ class DataCollatorCTCWithPadding:
+     processor: AutoProcessor
+     padding: Union[bool, str] = "longest"
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         # split inputs and labels since they have to be of different lengths and need
+         # different padding methods
+         input_features = [{"input_values": feature["input_values"][0]} for feature in features]
+         label_features = [{"input_ids": feature["labels"]} for feature in features]
+         batch = self.processor.pad(input_features, padding=self.padding, return_tensors="pt")
+         labels_batch = self.processor.pad(labels=label_features, padding=self.padding, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+         batch["labels"] = labels
+         return batch
+
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..553e18866c0ddeeebf15e80cdc33e8e7be35e736
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_4.txt
@@ -0,0 +1,38 @@
+Now instantiate your DataCollatorForCTCWithPadding:
+
+data_collator = DataCollatorCTCWithPadding(processor=processor, padding="longest")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the word error rate (WER) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+wer = evaluate.load("wer")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the WER:
+
+import numpy as np
+def compute_metrics(pred):
+     pred_logits = pred.predictions
+     pred_ids = np.argmax(pred_logits, axis=-1)
+
+     pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
+     pred_str = processor.batch_decode(pred_ids)
+     label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
+     wer = wer.compute(predictions=pred_str, references=label_str)
+     return {"wer": wer}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForCTC]. Specify the reduction to apply with the ctc_loss_reduction parameter. It is often better to use the average instead of the default summation:
+
+from transformers import AutoModelForCTC, TrainingArguments, Trainer
+model = AutoModelForCTC.from_pretrained(
+     "facebook/wav2vec2-base",
+     ctc_loss_reduction="mean",
+     pad_token_id=processor.tokenizer.pad_token_id,
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75698c8ec20fe12aef13f431623467187658cb53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_5.txt
@@ -0,0 +1,46 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the WER and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_asr_mind_model",
+     per_device_train_batch_size=8,
+     gradient_accumulation_steps=2,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=2000,
+     gradient_checkpointing=True,
+     fp16=True,
+     group_by_length=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=8,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     load_best_model_at_end=True,
+     metric_for_best_model="wer",
+     greater_is_better=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=processor,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for automatic speech recognition, take a look at this blog post for English ASR and this post for multilingual ASR.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..afe0d2e994033eec3c2db3b7007a27b301fbe7cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_asr.txt_chunk_6.txt
@@ -0,0 +1,37 @@
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", "en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for automatic speech recognition with your model, and pass your audio file to it:
+
+from transformers import pipeline
+transcriber = pipeline("automatic-speech-recognition", model="stevhliu/my_awesome_asr_minds_model")
+transcriber(audio_file)
+{'text': 'I WOUD LIKE O SET UP JOINT ACOUNT WTH Y PARTNER'}
+
+The transcription is decent, but it could be better! Try finetuning your model on more examples to get even better results!
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a processor to preprocess the audio file and transcription and return the input as PyTorch tensors:
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForCTC
+model = AutoModelForCTC.from_pretrained("stevhliu/my_awesome_asr_mind_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted input_ids with the highest probability, and use the processor to decode the predicted input_ids back into text:
+
+import torch
+predicted_ids = torch.argmax(logits, dim=-1)
+transcription = processor.batch_decode(predicted_ids)
+transcription
+['I WOUL LIKE O SET UP JOINT ACOUNT WTH Y PARTNER']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c1534930f91fedd8f47afd3bdb50bc1220278a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_0.txt
@@ -0,0 +1,48 @@
+Audio classification
+[[open-in-colab]]
+
+Audio classification - just like with text - assigns a class label output from the input data. The only difference is instead of text inputs, you have raw audio waveforms. Some practical applications of audio classification include identifying speaker intent, language classification, and even animal species by their sounds.
+This guide will show you how to:
+
+Finetune Wav2Vec2 on the MInDS-14 dataset to classify speaker intent.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load MInDS-14 dataset
+Start by loading the MInDS-14 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset, Audio
+minds = load_dataset("PolyAI/minds14", name="en-US", split="train")
+
+Split the dataset's train split into a smaller train and test set with the [~datasets.Dataset.train_test_split] method. This'll give you a chance to experiment and make sure everything works before spending more time on the full dataset.
+
+minds = minds.train_test_split(test_size=0.2)
+
+Then take a look at the dataset:
+
+minds
+DatasetDict({
+    train: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 450
+    })
+    test: Dataset({
+        features: ['path', 'audio', 'transcription', 'english_transcription', 'intent_class', 'lang_id'],
+        num_rows: 113
+    })
+})
+
+While the dataset contains a lot of useful information, like lang_id and english_transcription, you'll focus on the audio and intent_class in this guide. Remove the other columns with the [~datasets.Dataset.remove_columns] method:
+
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..423ed3e78cd4665ad0ed62f0f3decbac25b30c77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+minds = minds.remove_columns(["path", "transcription", "english_transcription", "lang_id"])
+
+Take a look at an example now:
+
+minds["train"][0]
+{'audio': {'array': array([ 0.        ,  0.        ,  0.        , , -0.00048828,
+         -0.00024414, -0.00024414], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 8000},
+ 'intent_class': 2}
+
+There are two fields:
+
+audio: a 1-dimensional array of the speech signal that must be called to load and resample the audio file. 
+intent_class: represents the class id of the speaker's intent. 
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name to an integer and vice versa:
+
+labels = minds["train"].features["intent_class"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(2)]
+'app_error'
+
+Preprocess
+The next step is to load a Wav2Vec2 feature extractor to process the audio signal:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base")
+
+The MInDS-14 dataset has a sampling rate of 8000khz (you can find this information in it's dataset card), which means you'll need to resample the dataset to 16000kHz to use the pretrained Wav2Vec2 model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..490594dfda16707832535ff1c2d881db9a8b2538
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_2.txt
@@ -0,0 +1,28 @@
+minds = minds.cast_column("audio", Audio(sampling_rate=16_000))
+minds["train"][0]
+{'audio': {'array': array([ 2.2098757e-05,  4.6582241e-05, -2.2803260e-05, ,
+         -2.8419291e-04, -2.3305941e-04, -1.1425107e-04], dtype=float32),
+  'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~APP_ERROR/602b9a5fbb1e6d0fbce91f52.wav',
+  'sampling_rate': 16000},
+ 'intent_class': 2}
+
+Now create a preprocessing function that:
+
+Calls the audio column to load, and if necessary, resample the audio file.
+Checks if the sampling rate of the audio file matches the sampling rate of the audio data a model was pretrained with. You can find this information in the Wav2Vec2 model card.
+Set a maximum input length to batch longer inputs without truncating them.
+
+def preprocess_function(examples):
+     audio_arrays = [x["array"] for x in examples["audio"]]
+     inputs = feature_extractor(
+         audio_arrays, sampling_rate=feature_extractor.sampling_rate, max_length=16000, truncation=True
+     )
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once. Remove the columns you don't need, and rename intent_class to label because that's the name the model expects:
+
+encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
+encoded_minds = encoded_minds.rename_column("intent_class", "label")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0e324297b35947da5615fa0f4c8f6ed4b41622f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_3.txt
@@ -0,0 +1,28 @@
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions = np.argmax(eval_pred.predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load Wav2Vec2 with [AutoModelForAudioClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForAudioClassification, TrainingArguments, Trainer
+num_labels = len(id2label)
+model = AutoModelForAudioClassification.from_pretrained(
+     "facebook/wav2vec2-base", num_labels=num_labels, label2id=label2id, id2label=id2label
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f93950cf9492835ea6ba281f60764776706a29f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_4.txt
@@ -0,0 +1,42 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_mind_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=3e-5,
+     per_device_train_batch_size=32,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=32,
+     num_train_epochs=10,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=encoded_minds["train"],
+     eval_dataset=encoded_minds["test"],
+     tokenizer=feature_extractor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+For a more in-depth example of how to finetune a model for audio classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Load an audio file you'd like to run inference on. Remember to resample the sampling rate of the audio file to match the sampling rate of the model if you need to!
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("PolyAI/minds14", name="en-US", split="train")
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+sampling_rate = dataset.features["audio"].sampling_rate
+audio_file = dataset[0]["audio"]["path"]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37cb03f17a0af91fca19ccdf256de1e104723d02
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_audio_classification.txt_chunk_5.txt
@@ -0,0 +1,35 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for audio classification with your model, and pass your audio file to it:
+
+from transformers import pipeline
+classifier = pipeline("audio-classification", model="stevhliu/my_awesome_minds_model")
+classifier(audio_file)
+[
+    {'score': 0.09766869246959686, 'label': 'cash_deposit'},
+    {'score': 0.07998877018690109, 'label': 'app_error'},
+    {'score': 0.0781070664525032, 'label': 'joint_account'},
+    {'score': 0.07667109370231628, 'label': 'pay_bill'},
+    {'score': 0.0755252093076706, 'label': 'balance'}
+]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load a feature extractor to preprocess the audio file and return the input as PyTorch tensors:
+
+from transformers import AutoFeatureExtractor
+feature_extractor = AutoFeatureExtractor.from_pretrained("stevhliu/my_awesome_minds_model")
+inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForAudioClassification
+model = AutoModelForAudioClassification.from_pretrained("stevhliu/my_awesome_minds_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+import torch
+predicted_class_ids = torch.argmax(logits).item()
+predicted_label = model.config.id2label[predicted_class_ids]
+predicted_label
+'cash_deposit'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f1282cd22ae9d7b3943b8b6a566789fec51ff5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_0.txt
@@ -0,0 +1,42 @@
+Document Question Answering
+[[open-in-colab]]
+Document Question Answering, also referred to as Document Visual Question Answering, is a task that involves providing
+answers to questions posed about document images. The input to models supporting this task is typically a combination of an image and
+a question, and the output is an answer expressed in natural language. These models utilize multiple modalities, including
+text, the positions of words (bounding boxes), and the image itself.
+This guide illustrates how to:
+
+Fine-tune LayoutLMv2 on the DocVQA dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+LayoutLMv2 solves the document question-answering task by adding a question-answering head on top of the final hidden
+states of the tokens, to predict the positions of the start and end tokens of the
+answer. In other words, the problem is treated as extractive question answering: given the context, extract which piece
+of information answers the question. The context comes from the output of an OCR engine, here it is Google's Tesseract.
+Before you begin, make sure you have all the necessary libraries installed. LayoutLMv2 depends on detectron2, torchvision and tesseract.
+
+pip install -q transformers datasets
+
+pip install 'git+https://github.com/facebookresearch/detectron2.git'
+pip install torchvision
+
+sudo apt install tesseract-ocr
+pip install -q pytesseract
+Once you have installed all of the dependencies, restart your runtime.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define some global variables.
+
+model_checkpoint = "microsoft/layoutlmv2-base-uncased"
+batch_size = 4
+
+Load the data
+In this guide we use a small sample of preprocessed DocVQA that you can find on 🤗 Hub. If you'd like to use the full
+DocVQA dataset, you can register and download it on DocVQA homepage. If you do so, to
+proceed with this guide check out how to load files into a 🤗 dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..743d94a70e3ae9428fb3a5f8bcc4f451883e30d7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_1.txt
@@ -0,0 +1,41 @@
+from datasets import load_dataset
+dataset = load_dataset("nielsr/docvqa_1200_examples")
+dataset
+DatasetDict({
+    train: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 1000
+    })
+    test: Dataset({
+        features: ['id', 'image', 'query', 'answers', 'words', 'bounding_boxes', 'answer'],
+        num_rows: 200
+    })
+})
+
+As you can see, the dataset is split into train and test sets already. Take a look at a random example to familiarize
+yourself with the features.
+
+dataset["train"].features
+
+Here's what the individual fields represent:
+* id: the example's id
+* image: a PIL.Image.Image object containing the document image
+* query: the question string - natural language asked question, in several languages
+* answers: a list of correct answers provided by human annotators
+* words and bounding_boxes: the results of OCR, which we will not use here
+* answer: an answer matched by a different model which we will not use here
+Let's leave only English questions, and drop the answer feature which appears to contain predictions by another model.
+We'll also take the first of the answers from the set provided by the annotators. Alternatively, you can randomly sample it.
+
+updated_dataset = dataset.map(lambda example: {"question": example["query"]["en"]}, remove_columns=["query"])
+updated_dataset = updated_dataset.map(
+     lambda example: {"answer": example["answers"][0]}, remove_columns=["answer", "answers"]
+ )
+
+Note that the LayoutLMv2 checkpoint that we use in this guide has been trained with max_position_embeddings = 512 (you can
+find this information in the checkpoint's config.json file).
+We can truncate the examples but to avoid the situation where the answer might be at the end of a large document and end up truncated,
+here we'll remove the few examples where the embedding is likely to end up longer than 512.
+If most of the documents in your dataset are long, you can implement a sliding window strategy - check out this notebook for details.
+
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e31b65a085355263edaea6b46a6a10ed8b8c5ccc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_10.txt
@@ -0,0 +1,19 @@
+Now that we have this preprocessing function, we can encode the entire dataset:
+
+encoded_train_dataset = dataset_with_ocr["train"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["train"].column_names
+ )
+encoded_test_dataset = dataset_with_ocr["test"].map(
+     encode_dataset, batched=True, batch_size=2, remove_columns=dataset_with_ocr["test"].column_names
+ )
+
+Let's check what the features of the encoded dataset look like:
+
+encoded_train_dataset.features
+{'image': Sequence(feature=Sequence(feature=Sequence(feature=Value(dtype='uint8', id=None), length=-1, id=None), length=-1, id=None), length=-1, id=None),
+ 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
+ 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
+ 'bbox': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+ 'start_positions': Value(dtype='int64', id=None),
+ 'end_positions': Value(dtype='int64', id=None)}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df3cebe1b15ba439bf155bb286212e2a001f4570
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_11.txt
@@ -0,0 +1,44 @@
+Evaluation
+Evaluation for document question answering requires a significant amount of postprocessing. To avoid taking up too much
+of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so
+you're not completely in the dark about your model's performance. Extractive question answering is typically evaluated using F1/exact match.
+If you'd like to implement it yourself, check out the Question Answering chapter
+of the Hugging Face course for inspiration.
+Train
+Congratulations! You've successfully navigated the toughest part of this guide and now you are ready to train your own model.
+Training involves the following steps:
+* Load the model with [AutoModelForDocumentQuestionAnswering] using the same checkpoint as in the preprocessing.
+* Define your training hyperparameters in [TrainingArguments].
+* Define a function to batch examples together, here the [DefaultDataCollator] will do just fine
+* Pass the training arguments to [Trainer] along with the model, dataset, and data collator.
+* Call [~Trainer.train] to finetune your model.
+
+from transformers import AutoModelForDocumentQuestionAnswering
+model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
+
+In the [TrainingArguments] use output_dir to specify where to save your model, and configure hyperparameters as you see fit.
+If you wish to share your model with the community, set push_to_hub to True (you must be signed in to Hugging Face to upload your model).
+In this case the output_dir will also be the name of the repo where your model checkpoint will be pushed.
+
+from transformers import TrainingArguments
+REPLACE THIS WITH YOUR REPO ID
+repo_id = "MariaK/layoutlmv2-base-uncased_finetuned_docvqa"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     eval_strategy="steps",
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dbf3d1629c2a66de7aaffae10a2a1447b99a6e4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_12.txt
@@ -0,0 +1,54 @@
+Define a simple data collator to batch examples together.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Finally, bring everything together, and call [~Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=encoded_train_dataset,
+     eval_dataset=encoded_test_dataset,
+     tokenizer=processor,
+ )
+trainer.train()
+
+To add the final model to 🤗 Hub, create a model card and call push_to_hub:
+
+trainer.create_model_card()
+trainer.push_to_hub()
+
+Inference
+Now that you have finetuned a LayoutLMv2 model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your finetuned model for inference is to use it in a [Pipeline].
+Let's take an example:
+
+example = dataset["test"][2]
+question = example["query"]["en"]
+image = example["image"]
+print(question)
+print(example["answers"])
+'Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?'
+['TRRF Vice President', 'lee a. waller']
+
+Next, instantiate a pipeline for
+document question answering with your model, and pass the image + question combination to it.
+
+from transformers import pipeline
+qa_pipeline = pipeline("document-question-answering", model="MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+qa_pipeline(image, question)
+[{'score': 0.9949808120727539,
+  'answer': 'Lee A. Waller',
+  'start': 55,
+  'end': 57}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. The model returns start_logits and end_logits, which indicate which token is at the start of the answer and
+which token is at the end of the answer. Both have shape (batch_size, sequence_length).
+4. Take an argmax on the last dimension of both the start_logits and end_logits to get the predicted start_idx and end_idx.
+5. Decode the answer with the tokenizer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..775c3ac31210e0add3a732d58796d92f5d3ecad4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_13.txt
@@ -0,0 +1,14 @@
+import torch
+from transformers import AutoProcessor
+from transformers import AutoModelForDocumentQuestionAnswering
+processor = AutoProcessor.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+model = AutoModelForDocumentQuestionAnswering.from_pretrained("MariaK/layoutlmv2-base-uncased_finetuned_docvqa")
+with torch.no_grad():
+     encoding = processor(image.convert("RGB"), question, return_tensors="pt")
+     outputs = model(**encoding)
+     start_logits = outputs.start_logits
+     end_logits = outputs.end_logits
+     predicted_start_idx = start_logits.argmax(-1).item()
+     predicted_end_idx = end_logits.argmax(-1).item()
+processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
+'lee a. waller'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83aaea689c3293bcfea2c3afb9ae9bda475fe210
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+updated_dataset = updated_dataset.filter(lambda x: len(x["words"]) + len(x["question"].split()) < 512)
+
+At this point let's also remove the OCR features from this dataset. These are a result of OCR for fine-tuning a different
+model. They would still require some processing if we wanted to use them, as they do not match the input requirements
+of the model we use in this guide. Instead, we can use the [LayoutLMv2Processor] on the original data for both OCR and
+tokenization. This way we'll get the inputs that match model's expected input. If you want to process images manually,
+check out the LayoutLMv2 model documentation to learn what input format the model expects.
+
+updated_dataset = updated_dataset.remove_columns("words")
+updated_dataset = updated_dataset.remove_columns("bounding_boxes")
+
+Finally, the data exploration won't be complete if we don't peek at an image example.
+
+updated_dataset["train"][11]["image"]
+
+Preprocess the data
+The Document Question Answering task is a multimodal task, and you need to make sure that the inputs from each modality
+are preprocessed according to the model's expectations. Let's start by loading the [LayoutLMv2Processor], which internally combines an image processor that can handle image data and a tokenizer that can encode text data.
+
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained(model_checkpoint)
+
+Preprocessing document images
+First, let's prepare the document images for the model with the help of the image_processor from the processor.
+By default, image processor resizes the images to 224x224, makes sure they have the correct order of color channels,
+applies OCR with tesseract to get words and normalized bounding boxes. In this tutorial, all of these defaults are exactly what we need.
+Write a function that applies the default image processing to a batch of images and returns the results of OCR.
+
+image_processor = processor.image_processor
+def get_ocr_words_and_boxes(examples):
+     images = [image.convert("RGB") for image in examples["image"]]
+     encoded_inputs = image_processor(images)
+
+     examples["image"] = encoded_inputs.pixel_values
+     examples["words"] = encoded_inputs.words
+     examples["boxes"] = encoded_inputs.boxes
+     return examples
+
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b68433d20b8e974a48702ba80a4c6687a82d7451
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+To apply this preprocessing to the entire dataset in a fast way, use [~datasets.Dataset.map].
+
+dataset_with_ocr = updated_dataset.map(get_ocr_words_and_boxes, batched=True, batch_size=2)
+
+Preprocessing text data
+Once we have applied OCR to the images, we need to encode the text part of the dataset to prepare it for the model.
+This involves converting the words and boxes that we got in the previous step to token-level input_ids, attention_mask,
+token_type_ids and bbox. For preprocessing text, we'll need the tokenizer from the processor.
+
+tokenizer = processor.tokenizer
+
+On top of the preprocessing mentioned above, we also need to add the labels for the model. For xxxForQuestionAnswering models
+in 🤗 Transformers, the labels consist of the start_positions and end_positions, indicating which token is at the
+start and which token is at the end of the answer.
+Let's start with that. Define a helper function that can find a sublist (the answer split into words) in a larger list (the words list).
+This function will take two lists as input, words_list and answer_list. It will then iterate over the words_list and check
+if the current word in the words_list (words_list[i]) is equal to the first word of answer_list (answer_list[0]) and if
+the sublist of words_list starting from the current word and of the same length as answer_list is equal to answer_list.
+If this condition is true, it means that a match has been found, and the function will record the match, its starting index (idx),
+and its ending index (idx + len(answer_list) - 1). If more than one match was found, the function will return only the first one.
+If no match is found, the function returns (None, 0, and 0).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b67ba79187fbd1538601ac11939f90ea8b2ff58
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_4.txt
@@ -0,0 +1,15 @@
+def subfinder(words_list, answer_list):
+     matches = []
+     start_indices = []
+     end_indices = []
+     for idx, i in enumerate(range(len(words_list))):
+         if words_list[i] == answer_list[0] and words_list[i : i + len(answer_list)] == answer_list:
+             matches.append(answer_list)
+             start_indices.append(idx)
+             end_indices.append(idx + len(answer_list) - 1)
+     if matches:
+         return matches[0], start_indices[0], end_indices[0]
+     else:
+         return None, 0, 0
+
+To illustrate how this function finds the position of the answer, let's use it on an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddf7bb1af513bf21804e86c5ac775d51d0093dec
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_5.txt
@@ -0,0 +1,9 @@
+example = dataset_with_ocr["train"][1]
+words = [word.lower() for word in example["words"]]
+match, word_idx_start, word_idx_end = subfinder(words, example["answer"].lower().split())
+print("Question: ", example["question"])
+print("Words:", words)
+print("Answer: ", example["answer"])
+print("start_index", word_idx_start)
+print("end_index", word_idx_end)
+Question:  Who is in  cc in this letter?
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91057223a78c327cb56d27fe177208990783dcf9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_6.txt
@@ -0,0 +1,2 @@
+
+Words: ['wie', 'baw', 'brown', '&', 'williamson', 'tobacco', 'corporation', 'research', '&', 'development', 'internal', 'correspondence', 'to:', 'r.', 'h.', 'honeycutt', 'ce:', 't.f.', 'riehl', 'from:', '.', 'c.j.', 'cook', 'date:', 'may', '8,', '1995', 'subject:', 'review', 'of', 'existing', 'brainstorming', 'ideas/483', 'the', 'major', 'function', 'of', 'the', 'product', 'innovation', 'graup', 'is', 'to', 'develop', 'marketable', 'nove!', 'products', 'that', 'would', 'be', 'profitable', 'to', 'manufacture', 'and', 'sell.', 'novel', 'is', 'defined', 'as:', 'of', 'a', 'new', 'kind,', 'or', 'different', 'from', 'anything', 'seen', 'or', 'known', 'before.', 'innovation', 'is', 'defined', 'as:', 'something', 'new', 'or', 'different', 'introduced;', 'act', 'of', 'innovating;', 'introduction', 'of', 'new', 'things', 'or', 'methods.', 'the', 'products', 'may', 'incorporate', 'the', 'latest', 'technologies,', 'materials', 'and', 'know-how', 'available', 'to', 'give', 'then', 'a', 'unique', 'taste', 'or', 'look.', 'the', 'first', 'task', 'of', 'the', 'product', 'innovation', 'group', 'was', 'to', 'assemble,', 'review', 'and', 'categorize', 'a', 'list', 'of', 'existing', 'brainstorming', 'ideas.', 'ideas', 'were', 'grouped', 'into', 'two', 'major', 'categories', 'labeled', 'appearance', 'and', 'taste/aroma.', 'these', 'categories', 'are', 'used', 'for', 'novel', 'products', 'that', 'may', 'differ', 'from', 'a', 'visual', 'and/or', 'taste/aroma', 'point', 'of', 'view', 'compared', 'to', 'canventional', 'cigarettes.', 'other', 'categories', 'include', 'a', 'combination', 'of', 'the', 'above,', 'filters,', 'packaging', 'and', 'brand', 'extensions.', 'appearance', 'this', 'category', 'is', 'used', 'for', 'novel', 'cigarette', 'constructions', 'that', 'yield', 'visually', 'different', 'products', 'with', 'minimal', 'changes', 'in', 'smoke', 'chemistry', 'two', 'cigarettes', 'in', 'cne.', 'emulti-plug', 'te', 'build', 'yaur', 'awn', 'cigarette.', 'eswitchable', 'menthol', 'or', 'non', 'menthol', 'cigarette.', 'cigarettes', 'with', 'interspaced', 'perforations', 'to', 'enable', 'smoker', 'to', 'separate', 'unburned', 'section', 'for', 'future', 'smoking.', '«short', 'cigarette,', 'tobacco', 'section', '30', 'mm.', '«extremely', 'fast', 'buming', 'cigarette.', '«novel', 'cigarette', 'constructions', 'that', 'permit', 'a', 'significant', 'reduction', 'iretobacco', 'weight', 'while', 'maintaining', 'smoking', 'mechanics', 'and', 'visual', 'characteristics.', 'higher', 'basis', 'weight', 'paper:', 'potential', 'reduction', 'in', 'tobacco', 'weight.', '«more', 'rigid', 'tobacco', 'column;', 'stiffing', 'agent', 'for', 'tobacco;', 'e.g.', 'starch', 'colored', 'tow', 'and', 'cigarette', 'papers;', 'seasonal', 'promotions,', 'e.g.', 'pastel', 'colored', 'cigarettes', 'for', 'easter', 'or', 'in', 'an', 'ebony', 'and', 'ivory', 'brand', 'containing', 'a', 'mixture', 'of', 'all', 'black', '(black', 'paper', 'and', 'tow)', 'and', 'ail', 'white', 'cigarettes.', '499150498']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07a053d213e45a7b81eb323d33bb026721f5b5ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_7.txt
@@ -0,0 +1,3 @@
+Answer:  T.F. Riehl
+start_index 17
+end_index 18
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57153f4d9598c1b3ce93f3c403e12c3818c1fc34
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_8.txt
@@ -0,0 +1,18 @@
+Once examples are encoded, however, they will look like this:
+
+encoding = tokenizer(example["question"], example["words"], example["boxes"])
+tokenizer.decode(encoding["input_ids"])
+[CLS] who is in cc in this letter? [SEP] wie baw brown & williamson tobacco corporation research & development 
+
+We'll need to find the position of the answer in the encoded input.
+* token_type_ids tells us which tokens are part of the question, and which ones are part of the document's words.
+* tokenizer.cls_token_id will help find the special token at the beginning of the input.
+* word_ids will help match the answer found in the original words to the same answer in the full encoded input and determine
+the start/end position of the answer in the encoded input.
+With that in mind, let's create a function to encode a batch of examples in the dataset:
+
+def encode_dataset(examples, max_length=512):
+     questions = examples["question"]
+     words = examples["words"]
+     boxes = examples["boxes"]
+     answers = examples["answer"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c18d141264f742a95ef0909c3aeaa857ad4d59e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_document_question_answering.txt_chunk_9.txt
@@ -0,0 +1,47 @@
+# encode the batch of examples and initialize the start_positions and end_positions
+     encoding = tokenizer(questions, words, boxes, max_length=max_length, padding="max_length", truncation=True)
+     start_positions = []
+     end_positions = []
+     # loop through the examples in the batch
+     for i in range(len(questions)):
+         cls_index = encoding["input_ids"][i].index(tokenizer.cls_token_id)
+         # find the position of the answer in example's words
+         words_example = [word.lower() for word in words[i]]
+         answer = answers[i]
+         match, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
+         if match:
+             # if match is found, use token_type_ids to find where words start in the encoding
+             token_type_ids = encoding["token_type_ids"][i]
+             token_start_index = 0
+             while token_type_ids[token_start_index] != 1:
+                 token_start_index += 1
+             token_end_index = len(encoding["input_ids"][i]) - 1
+             while token_type_ids[token_end_index] != 1:
+                 token_end_index -= 1
+             word_ids = encoding.word_ids(i)[token_start_index : token_end_index + 1]
+             start_position = cls_index
+             end_position = cls_index
+             # loop over word_ids and increase token_start_index until it matches the answer position in words
+             # once it matches, save the token_start_index as the start_position of the answer in the encoding
+             for id in word_ids:
+                 if id == word_idx_start:
+                     start_position = token_start_index
+                 else:
+                     token_start_index += 1
+             # similarly loop over word_ids starting from the end to find the end_position of the answer
+             for id in word_ids[::-1]:
+                 if id == word_idx_end:
+                     end_position = token_end_index
+                 else:
+                     token_end_index -= 1
+             start_positions.append(start_position)
+             end_positions.append(end_position)
+         else:
+             start_positions.append(cls_index)
+             end_positions.append(cls_index)
+     encoding["image"] = examples["image"]
+     encoding["start_positions"] = start_positions
+     encoding["end_positions"] = end_positions
+     return encoding
+
+Now that we have this preprocessing function, we can encode the entire dataset:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11c24ef3346b5455a1d06a57fddb6f3a858e94db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Image tasks with IDEFICS
+[[open-in-colab]]
+While individual tasks can be tackled by fine-tuning specialized models, an alternative approach 
+that has recently emerged and gained popularity is to use large models for a diverse set of tasks without fine-tuning. 
+For instance, large language models can handle such NLP tasks as summarization, translation, classification, and more. 
+This approach is no longer limited to a single modality, such as text, and in this guide, we will illustrate how you can 
+solve image-text tasks with a large multimodal model called IDEFICS. 
+IDEFICS is an open-access vision and language model based on Flamingo, 
+a state-of-the-art visual language model initially developed by DeepMind. The model accepts arbitrary sequences of image 
+and text inputs and generates coherent text as output. It can answer questions about images, describe visual content, 
+create stories grounded in multiple images, and so on. IDEFICS comes in two variants - 80 billion parameters 
+and 9 billion parameters, both of which are available on the 🤗 Hub. For each variant, you can also find fine-tuned instructed 
+versions of the model adapted for conversational use cases.
+This model is exceptionally versatile and can be used for a wide range of image and multimodal tasks. However, 
+being a large model means it requires significant computational resources and infrastructure. It is up to you to decide whether 
+this approach suits your use case better than fine-tuning specialized models for each individual task. 
+In this guide, you'll learn how to: 
+- Load IDEFICS and load the quantized version of the model
+- Use IDEFICS for: 
+  - Image captioning
+  - Prompted image captioning
+  - Few-shot prompting
+  - Visual question answering
+  - Image classification
+  - Image-guided text generation
+- Run inference in batch mode
+- Run IDEFICS instruct for conversational use
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q bitsandbytes sentencepiece accelerate transformers
+
+To run the following examples with a non-quantized version of the model checkpoint you will need at least 20GB of GPU memory.
+
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..417b870b94ed79200df5cb0cee4397e635bb863d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Loading the model
+Let's start by loading the model's 9 billion parameters checkpoint: 
+
+checkpoint = "HuggingFaceM4/idefics-9b"
+
+Just like for other Transformers models, you need to load a processor and the model itself from the checkpoint. 
+The IDEFICS processor wraps a [LlamaTokenizer] and IDEFICS image processor into a single processor to take care of 
+preparing text and image inputs for the model.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
+
+Setting device_map to "auto" will automatically determine how to load and store the model weights in the most optimized 
+manner given existing devices.
+Quantized model
+If high-memory GPU availability is an issue, you can load the quantized version of the model. To load the model and the 
+processor in 4bit precision, pass a BitsAndBytesConfig to the from_pretrained method and the model will be compressed 
+on the fly while loading.
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor, BitsAndBytesConfig
+quantization_config = BitsAndBytesConfig(
+     load_in_4bit=True,
+     bnb_4bit_compute_dtype=torch.float16,
+ )
+processor = AutoProcessor.from_pretrained(checkpoint)
+model = IdeficsForVisionText2Text.from_pretrained(
+     checkpoint,
+     quantization_config=quantization_config,
+     device_map="auto"
+ )
+
+Now that you have the model loaded in one of the suggested ways, let's move on to exploring tasks that you can use IDEFICS for.
+Image captioning
+Image captioning is the task of predicting a caption for a given image. A common application is to aid visually impaired 
+people navigate through different situations, for instance, explore image content online. 
+To illustrate the task, get an image to be captioned, e.g.:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58389fce6f5593940f0b2a143e95298089904825
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_10.txt
@@ -0,0 +1,31 @@
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
+
+IDEFICS instruct for conversational use
+For conversational use cases, you can find fine-tuned instructed versions of the model on the 🤗 Hub: 
+HuggingFaceM4/idefics-80b-instruct and HuggingFaceM4/idefics-9b-instruct.
+These checkpoints are the result of fine-tuning the respective base models on a mixture of supervised and instruction 
+fine-tuning datasets, which boosts the downstream performance while making the models more usable in conversational settings.
+The use and prompting for the conversational use is very similar to using the base models: 
+
+import torch
+from transformers import IdeficsForVisionText2Text, AutoProcessor
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "HuggingFaceM4/idefics-9b-instruct"
+model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+prompts = [
+     [
+         "User: What is in this image?",
+         "https://upload.wikimedia.org/wikipedia/commons/8/86/Id%C3%A9fix.JPG",
+         "",
+
+         "\nAssistant: This picture depicts Idefix, the dog of Obelix in Asterix and Obelix. Idefix is running on the ground.",
+         "\nUser:",
+         "https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052",
+         "And who is that?",
+         "\nAssistant:",
+     ],
+ ]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8f15d4d2295ccacf850f001252e18eb7b1bf994
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_11.txt
@@ -0,0 +1,11 @@
+--batched mode
+inputs = processor(prompts, add_end_of_utterance_token=False, return_tensors="pt").to(device)
+--single sample mode
+inputs = processor(prompts[0], return_tensors="pt").to(device)
+Generation args
+exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=100)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i, t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..57ae3a5ac4464626d324184c967a140a99820b69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_2.txt
@@ -0,0 +1,27 @@
+Photo by Hendo Wang. 
+IDEFICS accepts text and image prompts. However, to caption an image, you do not have to provide a text prompt to the 
+model, only the preprocessed input image. Without a text prompt, the model will start generating text from the 
+BOS (beginning-of-sequence) token thus creating a caption.
+As image input to the model, you can use either an image object (PIL.Image) or a url from which the image can be retrieved.
+
+prompt = [
+     "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+A puppy in a flower bed
+
+It is a good idea to include the bad_words_ids in the call to generate to avoid errors arising when increasing 
+the max_new_tokens: the model will want to generate a new <image> or <fake_token_around_image> token when there 
+is no image being generated by the model.
+You can set it on-the-fly as in this guide, or store in the GenerationConfig as described in the Text generation strategies guide.
+
+Prompted image captioning
+You can extend image captioning by providing a text prompt, which the model will continue given the image. Let's take 
+another image to illustrate:
+
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b25bfc6a83c15f7639c3d8eed927db6fd067914
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_3.txt
@@ -0,0 +1,23 @@
+Photo by Denys Nevozhai.
+Textual and image prompts can be passed to the model's processor as a single list to create appropriate inputs.
+
+prompt = [
+     "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+     "This is an image of ",
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+This is an image of the Eiffel Tower in Paris, France.
+
+Few-shot prompting
+While IDEFICS demonstrates great zero-shot results, your task may require a certain format of the caption, or come with 
+other restrictions or requirements that increase task's complexity. Few-shot prompting can be used to enable in-context learning.
+By providing examples in the prompt, you can steer the model to generate results that mimic the format of given examples. 
+Let's use the previous image of the Eiffel Tower as an example for the model and build a prompt that demonstrates to the model 
+that in addition to learning what the object in an image is, we would also like to get some interesting information about it. 
+Then, let's see, if we can get the same response format for an image of the Statue of Liberty:
+
+Photo by Juan Mayobre.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef3189e5e64e295fc0c7caf2e1db5ef1c3702eca
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_4.txt
@@ -0,0 +1,18 @@
+Photo by Juan Mayobre.
+
+prompt = ["User:",
+            "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+            "Describe this image.\nAssistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building.\n",
+            "User:",
+            "https://images.unsplash.com/photo-1524099163253-32b7f0256868?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3387&q=80",
+            "Describe this image.\nAssistant:"
+            ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=30, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+User: Describe this image.
+Assistant: An image of the Eiffel Tower at night. Fun fact: the Eiffel Tower is the same height as an 81-storey building. 
+User: Describe this image.
+Assistant: An image of the Statue of Liberty. Fun fact: the Statue of Liberty is 151 feet tall.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1295535d481f56324410c64077eec74f9ff4904f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_5.txt
@@ -0,0 +1,29 @@
+Notice that just from a single example (i.e., 1-shot) the model has learned how to perform the task. For more complex tasks, 
+feel free to experiment with a larger number of examples (e.g., 3-shot, 5-shot, etc.).
+Visual question answering
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. Similar to image 
+captioning it can be used in accessibility applications, but also in education (reasoning about visual materials), customer 
+service (questions about products based on images), and image retrieval.
+Let's get a new image for this task: 
+
+Photo by Jarritos Mexican Soda. 
+You can steer the model from image captioning to visual question answering by prompting it with appropriate instructions: 
+
+prompt = [
+     "Instruction: Provide an answer to the question. Use the image to answer.\n",
+     "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+     "Question: Where are these people and what's the weather like? Answer:"
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=20, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Provide an answer to the question. Use the image to answer.
+ Question: Where are these people and what's the weather like? Answer: They're in a park in New York City, and it's a beautiful day.
+
+Image classification
+IDEFICS is capable of classifying images into different categories without being explicitly trained on data containing 
+labeled examples from those specific categories. Given a list of categories and using its image and text understanding 
+capabilities, the model can infer which category the image likely belongs to. 
+Say, we have this image of a vegetable stand:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..085058b9f3501c0673479be5e0b3fbc77eccea7b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_6.txt
@@ -0,0 +1,24 @@
+Photo by Peter Wendt.
+We can instruct the model to classify the image into one of the categories that we have:
+
+categories = ['animals','vegetables', 'city landscape', 'cars', 'office']
+prompt = [f"Instruction: Classify the following image into a single category from the following list: {categories}.\n",
+     "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",  
+     "Category: "
+ ]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=6, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0])
+Instruction: Classify the following image into a single category from the following list: ['animals', 'vegetables', 'city landscape', 'cars', 'office'].
+Category: Vegetables
+```  
+
+In the example above we instruct the model to classify the image into a single category, however, you can also prompt the model to do rank classification.
+Image-guided text generation
+For more creative applications, you can use image-guided text generation to generate text based on an image. This can be 
+useful to create descriptions of products, ads, descriptions of a scene, etc. 
+Let's prompt IDEFICS to write a story based on a simple image of a red door: 
+
+Photo by Craig Tidball.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36d95d0b8aa2dd356afe419c4af4d9a87498900a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_7.txt
@@ -0,0 +1,28 @@
+Photo by Craig Tidball.
+
+prompt = ["Instruction: Use the image to write a story. \n",
+     "https://images.unsplash.com/photo-1517086822157-2b0358e7684a?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=2203&q=80",
+     "Story: \n"]
+inputs = processor(prompt, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, num_beams=2, max_new_tokens=200, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+print(generated_text[0]) 
+Instruction: Use the image to write a story. 
+ Story: 
+Once upon a time, there was a little girl who lived in a house with a red door.  She loved her red door.  It was the prettiest door in the whole world.
+
+One day, the little girl was playing in her yard when she noticed a man standing on her doorstep.  He was wearing a long black coat and a top hat.
+The little girl ran inside and told her mother about the man.
+Her mother said, “Don’t worry, honey.  He’s just a friendly ghost.”
+The little girl wasn’t sure if she believed her mother, but she went outside anyway.
+When she got to the door, the man was gone.
+The next day, the little girl was playing in her yard again when she noticed the man standing on her doorstep.
+He was wearing a long black coat and a top hat.
+The little girl ran
+
+Looks like IDEFICS noticed the pumpkin on the doorstep and went with a spooky Halloween story about a ghost.
+
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..48acd9c71c769a85d2fd4e6e7c37ba55f614bc0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+For longer outputs like this, you will greatly benefit from tweaking the text generation strategy. This can help 
+you significantly improve the quality of the generated output. Check out Text generation strategies 
+to learn more. 
+
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12849b9d1da5c7c9f758468c063fdc4cea1b6163
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_idefics.txt_chunk_9.txt
@@ -0,0 +1,28 @@
+Running inference in batch mode
+All of the earlier sections illustrated IDEFICS for a single example. In a very similar fashion, you can run inference 
+for a batch of examples by passing a list of prompts:
+
+prompts = [
+     [   "https://images.unsplash.com/photo-1543349689-9a4d426bee8e?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3501&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1623944889288-cd147dbb517c?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+     [   "https://images.unsplash.com/photo-1471193945509-9ad0617afabf?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3540&q=80",
+         "This is an image of ",
+     ],
+ ]
+inputs = processor(prompts, return_tensors="pt").to("cuda")
+bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids
+generated_ids = model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+for i,t in enumerate(generated_text):
+     print(f"{i}:\n{t}\n") 
+0:
+This is an image of the Eiffel Tower in Paris, France.
+
+1:
+This is an image of a couple on a picnic blanket.
+2:
+This is an image of a vegetable stand.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..836f978573fb6e239ad95a302bda0aa0837c9002
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_0.txt
@@ -0,0 +1,37 @@
+Image captioning
+[[open-in-colab]]
+Image captioning is the task of predicting a caption for a given image. Common real world applications of it include
+aiding visually impaired people that can help them navigate through different situations. Therefore, image captioning
+helps to improve content accessibility for people by describing images to them.
+This guide will show you how to:
+
+Fine-tune an image captioning model.
+Use the fine-tuned model for inference. 
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate -q
+pip install jiwer -q
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the Pokémon BLIP captions dataset
+Use the 🤗 Dataset library to load a dataset that consists of {image-caption} pairs. To create your own image captioning dataset
+in PyTorch, you can follow this notebook. 
+thon
+from datasets import load_dataset
+ds = load_dataset("lambdalabs/pokemon-blip-captions")
+ds
+bash
+DatasetDict({
+    train: Dataset({
+        features: ['image', 'text'],
+        num_rows: 833
+    })
+})
+
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..25a34c2424a9177b1ddeb8adbb5e63faf81f2439
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_1.txt
@@ -0,0 +1,45 @@
+The dataset has two features, image and text.
+
+Many image captioning datasets contain multiple captions per image. In those cases, a common strategy is to randomly sample a caption amongst the available ones during training. 
+
+Split the dataset’s train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+python
+ds = ds["train"].train_test_split(test_size=0.1)
+train_ds = ds["train"]
+test_ds = ds["test"]
+Let's visualize a couple of samples from the training set. 
+thon
+from textwrap import wrap
+import matplotlib.pyplot as plt
+import numpy as np
+def plot_images(images, captions):
+    plt.figure(figsize=(20, 20))
+    for i in range(len(images)):
+        ax = plt.subplot(1, len(images), i + 1)
+        caption = captions[i]
+        caption = "\n".join(wrap(caption, 12))
+        plt.title(caption)
+        plt.imshow(images[i])
+        plt.axis("off")
+sample_images_to_visualize = [np.array(train_ds[i]["image"]) for i in range(5)]
+sample_captions = [train_ds[i]["text"] for i in range(5)]
+plot_images(sample_images_to_visualize, sample_captions)
+
+Preprocess the dataset
+Since the dataset has two modalities (image and text), the pre-processing pipeline will preprocess images and the captions.
+To do so, load the processor class associated with the model you are about to fine-tune. 
+thon
+from transformers import AutoProcessor
+checkpoint = "microsoft/git-base"
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+The processor will internally pre-process the image (which includes resizing, and pixel scaling) and tokenize the caption. 
+thon
+def transforms(example_batch):
+    images = [x for x in example_batch["image"]]
+    captions = [x for x in example_batch["text"]]
+    inputs = processor(images=images, text=captions, padding="max_length")
+    inputs.update({"labels": inputs["input_ids"]})
+    return inputs
+train_ds.set_transform(transforms)
+test_ds.set_transform(transforms)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..53dd205b77489810d718862c1a87031f676ffc97
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_2.txt
@@ -0,0 +1,47 @@
+With the dataset ready, you can now set up the model for fine-tuning. 
+Load a base model
+Load the "microsoft/git-base" into a AutoModelForCausalLM object.
+thon
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(checkpoint)
+
+Evaluate
+Image captioning models are typically evaluated with the Rouge Score or Word Error Rate. For this guide, you will use the Word Error Rate (WER). 
+We use the 🤗 Evaluate library to do so. For potential limitations and other gotchas of the WER, refer to this guide. 
+thon
+from evaluate import load
+import torch
+wer = load("wer")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    predicted = logits.argmax(-1)
+    decoded_labels = processor.batch_decode(labels, skip_special_tokens=True)
+    decoded_predictions = processor.batch_decode(predicted, skip_special_tokens=True)
+    wer_score = wer.compute(predictions=decoded_predictions, references=decoded_labels)
+    return {"wer_score": wer_score}
+
+Train!
+Now, you are ready to start fine-tuning the model. You will use the 🤗 [Trainer] for this. 
+First, define the training arguments using [TrainingArguments].
+thon
+from transformers import TrainingArguments, Trainer
+model_name = checkpoint.split("/")[1]
+training_args = TrainingArguments(
+    output_dir=f"{model_name}-pokemon",
+    learning_rate=5e-5,
+    num_train_epochs=50,
+    fp16=True,
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    gradient_accumulation_steps=2,
+    save_total_limit=3,
+    eval_strategy="steps",
+    eval_steps=50,
+    save_strategy="steps",
+    save_steps=50,
+    logging_steps=50,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    label_names=["labels"],
+    load_best_model_at_end=True,
+)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c8f2690d3c9965b18be30eb67e230e560c1b3af0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_captioning.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+Then pass them along with the datasets and the model to 🤗 Trainer. 
+python
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=test_ds,
+    compute_metrics=compute_metrics,
+)
+To start training, simply call [~Trainer.train] on the [Trainer] object.
+python 
+trainer.train()
+You should see the training loss drop smoothly as training progresses.
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method so everyone can use your model:
+python
+trainer.push_to_hub()
+Inference
+Take a sample image from test_ds to test the model.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Prepare image for the model.
+thon
+device = "cuda" if torch.cuda.is_available() else "cpu"
+inputs = processor(images=image, return_tensors="pt").to(device)
+pixel_values = inputs.pixel_values
+
+Call [generate] and decode the predictions. 
+python
+generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
+generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+print(generated_caption)
+
+a drawing of a pink and blue pokemon
+Looks like the fine-tuned model generated a pretty good caption!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4c88d7081690fd4e6dbefdf6113c2df047c0c17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_0.txt
@@ -0,0 +1,56 @@
+Image classification
+[[open-in-colab]]
+
+Image classification assigns a label or class to an image. Unlike text or audio classification, the inputs are the
+pixel values that comprise an image. There are many applications for image classification, such as detecting damage
+after a natural disaster, monitoring crop health, or helping screen medical images for signs of disease.
+This guide illustrates how to:
+
+Fine-tune ViT on the Food-101 dataset to classify a food item in an image.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate pillow torchvision scikit-learn
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load Food-101 dataset
+Start by loading a smaller subset of the Food-101 dataset from the 🤗 Datasets library. This will give you a chance to
+experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+food = load_dataset("food101", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+food = food.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+food["train"][0]
+{'image': ,
+ 'label': 79}
+
+Each example in the dataset has two fields:
+
+image: a PIL image of the food item
+label: the label class of the food item
+
+To make it easier for the model to get the label name from the label id, create a dictionary that maps the label name
+to an integer and vice versa:
+
+labels = food["train"].features["label"].names
+label2id, id2label = dict(), dict()
+for i, label in enumerate(labels):
+     label2id[label] = str(i)
+     id2label[str(i)] = label
+
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f1f659ac70450fc3a40d6415efd43bd2ff029a3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_1.txt
@@ -0,0 +1,44 @@
+Now you can convert the label id to a label name:
+
+id2label[str(79)]
+'prime_rib'
+
+Preprocess
+The next step is to load a ViT image processor to process the image into a tensor:
+
+from transformers import AutoImageProcessor
+checkpoint = "google/vit-base-patch16-224-in21k"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+
+Apply some image transformations to the images to make the model more robust against overfitting. Here you'll use torchvision's transforms module, but you can also use any image library you like.
+Crop a random part of the image, resize it, and normalize it with the image mean and standard deviation:
+
+from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
+normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
+size = (
+     image_processor.size["shortest_edge"]
+     if "shortest_edge" in image_processor.size
+     else (image_processor.size["height"], image_processor.size["width"])
+ )
+_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])
+
+Then create a preprocessing function to apply the transforms and return the pixel_values - the inputs to the model - of the image:
+
+def transforms(examples):
+     examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
+     del examples["image"]
+     return examples
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.with_transform] method. The transforms are applied on the fly when you load an element of the dataset:
+
+food = food.with_transform(transforms)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the DefaultDataCollator does not apply additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+To avoid overfitting and to make the model more robust, add some data augmentation to the training part of the dataset.
+Here we use Keras preprocessing layers to define the transformations for the training data (includes data augmentation),
+and transformations for the validation data (only center cropping, resizing and normalizing). You can use tf.imageor
+any other library you prefer.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..481cd58efd37b2d55e52b4d833880f7ae3811f1f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+from tensorflow import keras
+from tensorflow.keras import layers
+size = (image_processor.size["height"], image_processor.size["width"])
+train_data_augmentation = keras.Sequential(
+     [
+         layers.RandomCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+         layers.RandomFlip("horizontal"),
+         layers.RandomRotation(factor=0.02),
+         layers.RandomZoom(height_factor=0.2, width_factor=0.2),
+     ],
+     name="train_data_augmentation",
+ )
+val_data_augmentation = keras.Sequential(
+     [
+         layers.CenterCrop(size[0], size[1]),
+         layers.Rescaling(scale=1.0 / 127.5, offset=-1),
+     ],
+     name="val_data_augmentation",
+ )
+
+Next, create functions to apply appropriate transformations to a batch of images, instead of one image at a time.
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+def convert_to_tf_tensor(image: Image):
+     np_image = np.array(image)
+     tf_image = tf.convert_to_tensor(np_image)
+     # expand_dims() is used to add a batch dimension since
+     # the TF augmentation layers operates on batched inputs.
+     return tf.expand_dims(tf_image, 0)
+def preprocess_train(example_batch):
+     """Apply train_transforms across a batch."""
+     images = [
+         train_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+ def preprocess_val(example_batch):
+     """Apply val_transforms across a batch."""
+     images = [
+         val_data_augmentation(convert_to_tf_tensor(image.convert("RGB"))) for image in example_batch["image"]
+     ]
+     example_batch["pixel_values"] = [tf.transpose(tf.squeeze(image)) for image in images]
+     return example_batch
+
+Use 🤗 Datasets [~datasets.Dataset.set_transform] to apply the transformations on the fly:
+py
+food["train"].set_transform(preprocess_train)
+food["test"].set_transform(preprocess_val)
+As a final preprocessing step, create a batch of examples using DefaultDataCollator. Unlike other data collators in 🤗 Transformers, the
+DefaultDataCollator does not apply additional preprocessing, such as padding.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5cb7009d3e8bdfdd3d8ca9cced4371ac8fc83e4f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_3.txt
@@ -0,0 +1,39 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an
+evaluation method with the 🤗 Evaluate library. For this task, load
+the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you set up your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load ViT with [AutoModelForImageClassification]. Specify the number of labels along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
+model = AutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     num_labels=len(labels),
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because that'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8a978ee8ec8ccc8b9880217dac679ebebd2758b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_4.txt
@@ -0,0 +1,63 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_food_model",
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     gradient_accumulation_steps=4,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=food["train"],
+     eval_dataset=food["test"],
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pre-trained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks and use the fit() method to run the training.
+6. Upload your model to 🤗 Hub to share with the community.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 5
+num_train_steps = len(food["train"]) * num_epochs
+learning_rate = 3e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load ViT with [TFAutoModelForImageClassification] along with the label mappings:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c132d9112e1e1a1b83ca08db613e56f4376fc53
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_5.txt
@@ -0,0 +1,39 @@
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and your data_collator:
+
+converting our train dataset to tf.data.Dataset
+tf_train_dataset = food["train"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+converting our test dataset to tf.data.Dataset
+tf_eval_dataset = food["test"].to_tf_dataset(
+     columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
+ )
+
+Configure the model for training with compile():
+
+from tensorflow.keras.losses import SparseCategoricalCrossentropy
+loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+model.compile(optimizer=optimizer, loss=loss)
+
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to KerasMetricCallback,
+and use the PushToHubCallback to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_eval_dataset)
+push_to_hub_callback = PushToHubCallback(
+     output_dir="food_classifier",
+     tokenizer=image_processor,
+     save_strategy="no",
+ )
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7fe55d0ff0dd0a2a812c2624544cad64fb616076
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_6.txt
@@ -0,0 +1,27 @@
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(tf_train_dataset, validation_data=tf_eval_dataset, epochs=num_epochs, callbacks=callbacks)
+Epoch 1/5
+250/250 [==============================] - 313s 1s/step - loss: 2.5623 - val_loss: 1.4161 - accuracy: 0.9290
+Epoch 2/5
+250/250 [==============================] - 265s 1s/step - loss: 0.9181 - val_loss: 0.6808 - accuracy: 0.9690
+Epoch 3/5
+250/250 [==============================] - 252s 1s/step - loss: 0.3910 - val_loss: 0.4303 - accuracy: 0.9820
+Epoch 4/5
+250/250 [==============================] - 251s 1s/step - loss: 0.2028 - val_loss: 0.3191 - accuracy: 0.9900
+Epoch 5/5
+250/250 [==============================] - 238s 949ms/step - loss: 0.1232 - val_loss: 0.3259 - accuracy: 0.9890
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+For a more in-depth example of how to finetune a model for image classification, take a look at the corresponding PyTorch notebook.
+
+Inference
+Great, now that you've fine-tuned a model, you can use it for inference!
+Load an image you'd like to run inference on:
+
+ds = load_dataset("food101", split="validation[:10]")
+image = ds["image"][0]
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc510281f6d1d36d40727e6fb4a8061cff409e84
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_7.txt
@@ -0,0 +1,46 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for image classification with your model, and pass your image to it:
+
+from transformers import pipeline
+classifier = pipeline("image-classification", model="my_awesome_food_model")
+classifier(image)
+[{'score': 0.31856709718704224, 'label': 'beignets'},
+ {'score': 0.015232225880026817, 'label': 'bruschetta'},
+ {'score': 0.01519392803311348, 'label': 'chicken_wings'},
+ {'score': 0.013022331520915031, 'label': 'pork_chop'},
+ {'score': 0.012728818692266941, 'label': 'prime_rib'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Load an image processor to preprocess the image and return the input as PyTorch tensors:
+
+from transformers import AutoImageProcessor
+import torch
+image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
+inputs = image_processor(image, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForImageClassification
+model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_label = logits.argmax(-1).item()
+model.config.id2label[predicted_label]
+'beignets'
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/food_classifier")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForImageClassification
+model = TFAutoModelForImageClassification.from_pretrained("MariaK/food_classifier")
+logits = model(**inputs).logits
+
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..805343e7d40efe4250d7a9508c86da3652f52fdb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_classification.txt_chunk_8.txt
@@ -0,0 +1,5 @@
+Get the predicted label with the highest probability, and use the model's id2label mapping to convert it to a label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'beignets'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f3e31a03cde63ec1adaa95a6a42bce2e539d68c8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_0.txt
@@ -0,0 +1,23 @@
+Image Feature Extraction
+[[open-in-colab]]
+Image feature extraction is the task of extracting semantically meaningful features given an image. This has many use cases, including image similarity and image retrieval. Moreover, most computer vision models can be used for image feature extraction, where one can remove the task-specific head (image classification, object detection etc) and get the features. These features are very useful on a higher level: edge detection, corner detection and so on. They may also contain information about the real world (e.g. what a cat looks like) depending on how deep the model is. Therefore, these outputs can be used to train new classifiers on a specific dataset.
+In this guide, you will:
+
+Learn to build a simple image similarity system on top of the image-feature-extraction pipeline.
+Accomplish the same task with bare model inference.
+
+Image Similarity using image-feature-extraction Pipeline
+We have two images of cats sitting on top of fish nets, one of them is generated. 
+thon
+from PIL import Image
+import requests
+img_urls = ["https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.jpeg"]
+image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
+image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
+
+Let's see the pipeline in action. First, initialize the pipeline. If you don't pass any model to it, the pipeline will be automatically initialized with google/vit-base-patch16-224. If you'd like to calculate similarity, set pool to True.
+thon
+import torch
+from transformers import pipeline
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcd1ef06455d896c3f5278f115d80d9f15601d6d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_1.txt
@@ -0,0 +1,36 @@
+To infer with pipe pass both images to it.
+python
+outputs = pipe([image_real, image_gen])
+The output contains pooled embeddings of those two images.
+thon
+get the length of a single output
+print(len(outputs[0][0]))
+show outputs
+print(outputs)
+768
+[[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
+
+To get the similarity score, we need to pass them to a similarity function. 
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
+                                     torch.Tensor(outputs[1]), dim=1)
+print(similarity_score)
+tensor([0.6043])
+
+If you want to get the last hidden states before pooling, avoid passing any value for the pool parameter, as it is set to False by default. These hidden states are useful for training new classifiers or models based on the features from the model.
+python
+pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
+output = pipe(image_real)
+Since the outputs are unpooled, we get the last hidden states where the first dimension is the batch size, and the last two are the embedding shape.
+thon
+import numpy as np
+print(np.array(outputs).shape)
+(1, 197, 768)
+
+Getting Features and Similarities using AutoModel
+We can also use AutoModel class of transformers to get the features. AutoModel loads any transformers model with no task-specific head, and we can use this to get the features.
+thon
+from transformers import AutoImageProcessor, AutoModel
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
+model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d82ca1f92c6b39131aca30bf617246073b6f84b3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_feature_extraction.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Let's write a simple function for inference. We will pass the inputs to the processor first and pass its outputs to the model.
+python
+def infer(image):
+  inputs = processor(image, return_tensors="pt").to(DEVICE)
+  outputs = model(**inputs)
+  return outputs.pooler_output
+We can pass the images directly to this function and get the embeddings.
+python
+embed_real = infer(image_real)
+embed_gen = infer(image_gen)
+We can get the similarity again over the embeddings.
+thon
+from torch.nn.functional import cosine_similarity
+similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
+print(similarity_score)
+tensor([0.6061], device='cuda:0', grad_fn=)
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96b1c7cfd2fdb624e550e1878894414dcfd5db5f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Image-to-Image Task Guide
+[[open-in-colab]]
+Image-to-Image task is the task where an application receives an image and outputs another image. This has various subtasks, including image enhancement (super resolution, low light enhancement, deraining and so on), image inpainting, and more. 
+This guide will show you how to:
+- Use an image-to-image pipeline for super resolution task,
+- Run image-to-image models for same task without a pipeline.
+Note that as of the time this guide is released, image-to-image pipeline only supports super resolution task.
+Let's begin by installing the necessary libraries.
+
+pip install transformers
+We can now initialize the pipeline with a Swin2SR model. We can then infer with the pipeline by calling it with an image. As of now, only Swin2SR models are supported in this pipeline. 
+thon
+from transformers import pipeline
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device)
+
+Now, let's load an image.
+thon
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/cat.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+print(image.size)
+bash
+(532, 432)
+
+We can now do inference with the pipeline. We will get an upscaled version of the cat image. 
+python
+upscaled = pipe(image)
+print(upscaled.size)
+```bash
+(1072, 880)
+
+If you wish to do inference yourself with no pipeline, you can use the Swin2SRForImageSuperResolution and Swin2SRImageProcessor classes of transformers. We will use the same model checkpoint for this. Let's initialize the model and the processor.
+thon
+from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor 
+model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-lightweight-x2-64").to(device)
+processor = Swin2SRImageProcessor("caidas/swin2SR-lightweight-x2-64")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c99ecc8368c8c5c2ae4ece20dade825aa6ed659
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_1.txt
@@ -0,0 +1,25 @@
+pipeline abstracts away the preprocessing and postprocessing steps that we have to do ourselves, so let's preprocess the image. We will pass the image to the processor and then move the pixel values to GPU. 
+thon
+pixel_values = processor(image, return_tensors="pt").pixel_values
+print(pixel_values.shape)
+pixel_values = pixel_values.to(device)
+
+We can now infer the image by passing pixel values to the model.
+thon
+import torch
+with torch.no_grad():
+  outputs = model(pixel_values)
+``
+Output is an object of typeImageSuperResolutionOutput` that looks like below 👇 
+(loss=None, reconstruction=tensor([[[[0.8270, 0.8269, 0.8275,  , 0.7463, 0.7446, 0.7453],
+          [0.8287, 0.8278, 0.8283,  , 0.7451, 0.7448, 0.7457],
+          [0.8280, 0.8273, 0.8269,  , 0.7447, 0.7446, 0.7452],
+          ,
+          [0.5923, 0.5933, 0.5924,  , 0.0697, 0.0695, 0.0706],
+          [0.5926, 0.5932, 0.5926,  , 0.0673, 0.0687, 0.0705],
+          [0.5927, 0.5914, 0.5922,  , 0.0664, 0.0694, 0.0718]]]],
+       device='cuda:0'), hidden_states=None, attentions=None)
+We need to get the reconstruction and post-process it for visualization. Let's see how it looks like.
+thon
+outputs.reconstruction.data.shape
+torch.Size([1, 3, 880, 1072])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c6563f1b78b5c688c347aee126668ce5745592a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_image_to_image.txt_chunk_2.txt
@@ -0,0 +1,10 @@
+We need to squeeze the output and get rid of axis 0, clip the values, then convert it to be numpy float. Then we will arrange axes to have the shape [1072, 880], and finally, bring the output back to range [0, 255].
+thon
+import numpy as np
+squeeze, take to CPU and clip the values
+output = outputs.reconstruction.data.squeeze().cpu().clamp_(0, 1).numpy()
+rearrange the axes
+output = np.moveaxis(output, source=0, destination=-1)
+bring values back to pixel values range
+output = (output * 255.0).round().astype(np.uint8)
+Image.fromarray(output)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d717943bb35737aee6798d9215ecd42e0456328b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_0.txt
@@ -0,0 +1,21 @@
+Knowledge Distillation for Computer Vision
+[[open-in-colab]]
+Knowledge distillation is a technique used to transfer knowledge from a larger, more complex model (teacher) to a smaller, simpler model (student). To distill knowledge from one model to another, we take a pre-trained teacher model trained on a certain task (image classification for this case) and randomly initialize a student model to be trained on image classification. Next, we train the student model to minimize the difference between it's outputs and the teacher's outputs, thus making it mimic the behavior. It was first introduced in Distilling the Knowledge in a Neural Network by Hinton et al. In this guide, we will do task-specific knowledge distillation. We will use the beans dataset for this.
+This guide demonstrates how you can distill a fine-tuned ViT model (teacher model) to a MobileNet (student model) using the Trainer API of 🤗 Transformers. 
+Let's install the libraries needed for distillation and evaluating the process. 
+
+pip install transformers datasets accelerate tensorboard evaluate --upgrade
+In this example, we are using the merve/beans-vit-224 model as teacher model. It's an image classification model, based on google/vit-base-patch16-224-in21k fine-tuned on beans dataset. We will distill this model to a randomly initialized MobileNetV2.
+We will now load the dataset. 
+thon
+from datasets import load_dataset
+dataset = load_dataset("beans")
+
+We can use an image processor from either of the models, as in this case they return the same output with same resolution. We will use the map() method of dataset to apply the preprocessing to every split of the dataset. 
+thon
+from transformers import AutoImageProcessor
+teacher_processor = AutoImageProcessor.from_pretrained("merve/beans-vit-224")
+def process(examples):
+    processed_inputs = teacher_processor(examples["image"])
+    return processed_inputs
+processed_datasets = dataset.map(process, batched=True)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd8b434cc9aa399bf9ea7e5accc8831abc97fc11
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_1.txt
@@ -0,0 +1,32 @@
+Essentially, we want the student model (a randomly initialized MobileNet) to mimic the teacher model (fine-tuned vision transformer). To achieve this, we first get the logits output from the teacher and the student. Then, we divide each of them by the parameter temperature which controls the importance of each soft target. A parameter called lambda weighs the importance of the distillation loss. In this example, we will use temperature=5 and lambda=0.5. We will use the Kullback-Leibler Divergence loss to compute the divergence between the student and teacher. Given two data P and Q, KL Divergence explains how much extra information we need to represent P using Q. If two are identical, their KL divergence is zero, as there's no other information needed to explain P from Q. Thus, in the context of knowledge distillation, KL divergence is useful.
+thon
+from transformers import TrainingArguments, Trainer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ImageDistilTrainer(Trainer):
+    def init(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None,  args, kwargs):
+        super().init(model=student_model, args, **kwargs)
+        self.teacher = teacher_model
+        self.student = student_model
+        self.loss_function = nn.KLDivLoss(reduction="batchmean")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.teacher.to(device)
+        self.teacher.eval()
+        self.temperature = temperature
+        self.lambda_param = lambda_param
+def compute_loss(self, student, inputs, return_outputs=False):
+    student_output = self.student(**inputs)
+
+    with torch.no_grad():
+      teacher_output = self.teacher(**inputs)
+
+    # Compute soft targets for teacher and student
+    soft_teacher = F.softmax(teacher_output.logits / self.temperature, dim=-1)
+    soft_student = F.log_softmax(student_output.logits / self.temperature, dim=-1)
+
+    # Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..00005bfde53aa99e7ae19312249ae4c8a977ce52
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_2.txt
@@ -0,0 +1,54 @@
+# Compute the loss
+    distillation_loss = self.loss_function(soft_student, soft_teacher) * (self.temperature ** 2)
+
+    # Compute the true label loss
+    student_target_loss = student_output.loss
+
+    # Calculate final loss
+    loss = (1. - self.lambda_param) * student_target_loss + self.lambda_param * distillation_loss
+    return (loss, student_output) if return_outputs else loss
+
+We will now login to Hugging Face Hub so we can push our model to the Hugging Face Hub through the Trainer. 
+thon
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's set the TrainingArguments, the teacher model and the student model. 
+thon
+from transformers import AutoModelForImageClassification, MobileNetV2Config, MobileNetV2ForImageClassification
+training_args = TrainingArguments(
+    output_dir="my-awesome-model",
+    num_train_epochs=30,
+    fp16=True,
+    logging_dir=f"{repo_name}/logs",
+    logging_strategy="epoch",
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    load_best_model_at_end=True,
+    metric_for_best_model="accuracy",
+    report_to="tensorboard",
+    push_to_hub=True,
+    hub_strategy="every_save",
+    hub_model_id=repo_name,
+    )
+num_labels = len(processed_datasets["train"].features["labels"].names)
+initialize models
+teacher_model = AutoModelForImageClassification.from_pretrained(
+    "merve/beans-vit-224",
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True
+)
+training MobileNetV2 from scratch
+student_config = MobileNetV2Config()
+student_config.num_labels = num_labels
+student_model = MobileNetV2ForImageClassification(student_config)
+
+We can use compute_metrics function to evaluate our model on the test set. This function will be used during the training process to compute the accuracy & f1 of our model.
+thon
+import evaluate
+import numpy as np
+accuracy = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    acc = accuracy.compute(references=labels, predictions=np.argmax(predictions, axis=1))
+    return {"accuracy": acc["accuracy"]}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e956f24ddbb0d65858eaf3140c258062d15eac4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_knowledge_distillation_for_image_classification.txt_chunk_3.txt
@@ -0,0 +1,24 @@
+Let's initialize the Trainer with the training arguments we defined. We will also initialize our data collator.
+thon
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+trainer = ImageDistilTrainer(
+    student_model=student_model,
+    teacher_model=teacher_model,
+    training_args=training_args,
+    train_dataset=processed_datasets["train"],
+    eval_dataset=processed_datasets["validation"],
+    data_collator=data_collator,
+    tokenizer=teacher_processor,
+    compute_metrics=compute_metrics,
+    temperature=5,
+    lambda_param=0.5
+)
+
+We can now train our model.
+python
+trainer.train()
+We can evaluate the model on the test set.
+python
+trainer.evaluate(processed_datasets["test"])
+On test set, our model reaches 72 percent accuracy. To have a sanity check over efficiency of distillation, we also trained MobileNet on the beans dataset from scratch with the same hyperparameters and observed 63 percent accuracy on the test set. We invite the readers to try different pre-trained teacher models, student architectures, distillation parameters and report their findings. The training logs and checkpoints for distilled model can be found in this repository, and MobileNetV2 trained from scratch can be found in this repository.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6dbccaae8ec66169b6e17ae2b3574cded8bc685b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Causal language modeling
+[[open-in-colab]]
+There are two types of language modeling, causal and masked. This guide illustrates causal language modeling.
+Causal language models are frequently used for text generation. You can use these models for creative applications like
+choosing your own text adventure or an intelligent coding assistant like Copilot or CodeParrot.
+
+Causal language modeling predicts the next token in a sequence of tokens, and the model can only attend to tokens on
+the left. This means the model cannot see future tokens. GPT-2 is an example of a causal language model.
+This guide will show you how to:
+
+Finetune DistilGPT2 on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b065b562c66b87cbd98b86c8fef35e90b923e85
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..889e87be8f2297d81beede108108ca06a86de472
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,11 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling
+tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+The next step is to load a DistilGPT2 tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a82995a20f8f1c4f79cfbcab5f5df2533a16827e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,22 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to
+extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15cade37faa4685ccd6b3db6ef0b7ab0e5694474
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,42 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+
+concatenate all the sequences
+split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM.
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     result["labels"] = result["input_ids"].copy()
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b51021e0cdb58255c4715d1874a6aedf72fbd714
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the
+sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+
+Use the end-of-sequence token as the padding token and set mlm=False. This will use the inputs as labels shifted to the right by one element:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial!
+
+You're ready to start training your model now! Load DistilGPT2 with [AutoModelForCausalLM]:
+
+from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
+model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..42e599f1b2011febb273a5ecbb79fd0cc01fed7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_6.txt
@@ -0,0 +1,58 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_clm-model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 49.61
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilGPT2 with [TFAutoModelForCausalLM]:
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ccee8ca5d21a53e77658b4d84ee65f7fac24093
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,42 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_clm-model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for causal language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a prompt you'd like to generate text from:
+
+prompt = "Somatic hypermutation allows the immune system to"
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for text generation with your model, and pass your text to it:
+
+from transformers import pipeline
+generator = pipeline("text-generation", model="username/my_awesome_eli5_clm-model")
+generator(prompt)
+[{'generated_text': "Somatic hypermutation allows the immune system to be able to effectively reverse the damage caused by an infection.\n\n\nThe damage caused by an infection is caused by the immune system's ability to perform its own self-correcting tasks."}]
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f2428084c37777e8881e44d323dabe5e862f35e8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,27 @@
+Use the [~generation.GenerationMixin.generate] method to generate text.
+For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+["Somatic hypermutation allows the immune system to react to drugs with the ability to adapt to a different environmental situation. In other words, a system of 'hypermutation' can help the immune system to adapt to a different environmental situation or in some cases even a single life. In contrast, researchers at the University of Massachusetts-Boston have found that 'hypermutation' is much stronger in mice than in humans but can be found in humans, and that it's not completely unknown to the immune system. A study on how the immune system"]
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_clm-model")
+inputs = tokenizer(prompt, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text generation strategies page.
+
+from transformers import TFAutoModelForCausalLM
+model = TFAutoModelForCausalLM.from_pretrained("username/my_awesome_eli5_clm-model")
+outputs = model.generate(input_ids=inputs, max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
+
+Decode the generated token ids back into text:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb082f110c34df6c4e9d0d7a0de79a12baa94bb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,4 @@
+Decode the generated token ids back into text:
+
+tokenizer.batch_decode(outputs, skip_special_tokens=True)
+['Somatic hypermutation allows the immune system to detect the presence of other viruses as they become more prevalent. Therefore, researchers have identified a high proportion of human viruses. The proportion of virus-associated viruses in our study increases with age. Therefore, we propose a simple algorithm to detect the presence of these new viruses in our samples as a sign of improved immunity. A first study based on this algorithm, which will be published in Science on Friday, aims to show that this finding could translate into the development of a better vaccine that is more effective for']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78e2d605d140f64dad88cdf058de1771d9e7797e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Mask Generation
+Mask generation is the task of generating semantically meaningful masks for an image. 
+This task is very similar to image segmentation, but many differences exist. Image segmentation models are trained on labeled datasets and are limited to the classes they have seen during training; they return a set of masks and corresponding classes, given an image. 
+Mask generation models are trained on large amounts of data and operate in two modes. 
+- Prompting mode: In this mode, the model takes in an image and a prompt, where a prompt can be a 2D point location (XY coordinates) in the image within an object or a bounding box surrounding an object. In prompting mode, the model only returns the mask over the object 
+that the prompt is pointing out. 
+- Segment Everything mode: In segment everything, given an image, the model generates every mask in the image. To do so, a grid of points is generated and overlaid on the image for inference. 
+Mask generation task is supported by Segment Anything Model (SAM). It's a powerful model that consists of a Vision Transformer-based image encoder, a prompt encoder, and a two-way transformer mask decoder. Images and prompts are encoded, and the decoder takes these embeddings and generates valid masks. 
+
+SAM serves as a powerful foundation model for segmentation as it has large data coverage. It is trained on 
+SA-1B, a dataset with 1 million images and 1.1 billion masks. 
+In this guide, you will learn how to:
+- Infer in segment everything mode with batching,
+- Infer in point prompting mode,
+- Infer in box prompting mode.
+First, let's install transformers:
+
+pip install -q transformers
+Mask Generation Pipeline
+The easiest way to infer mask generation models is to use the mask-generation pipeline.
+thon
+
+from transformers import pipeline
+checkpoint = "facebook/sam-vit-base"
+mask_generator = pipeline(model=checkpoint, task="mask-generation")
+
+Let's see the image.
+thon
+from PIL import Image
+import requests
+img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"
+image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73581b6b918138cdc6e8307bc691efedc335a68
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_1.txt
@@ -0,0 +1,39 @@
+Let's segment everything. points-per-batch enables parallel inference of points in segment everything mode. This enables faster inference, but consumes more memory. Moreover, SAM only enables batching over points and not the images. pred_iou_thresh is the IoU confidence threshold where only the masks above that certain threshold are returned.
+python
+masks = mask_generator(image, points_per_batch=128, pred_iou_thresh=0.88)
+The masks looks like the following:
+
+{'masks': [array([[False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         [False, False, False, ,  True,  True,  True],
+         ,
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False]]),
+  array([[False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         [False, False, False, , False, False, False],
+         ,
+'scores': tensor([0.9972, 0.9917,
+        ,
+}
+We can visualize them like this:
+thon
+import matplotlib.pyplot as plt
+plt.imshow(image, cmap='gray')
+for i, mask in enumerate(masks["masks"]):
+    plt.imshow(mask, cmap='viridis', alpha=0.1, vmin=0, vmax=1)
+plt.axis('off')
+plt.show()
+
+Below is the original image in grayscale with colorful maps overlaid. Very impressive.
+
+Model Inference
+Point Prompting
+You can also use the model without the pipeline. To do so, initialize the model and
+the processor.
+thon
+from transformers import SamModel, SamProcessor
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..838b6620980a504ab2777ca9795e7a8bde40751c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+To do point prompting, pass the input point to the processor, then take the processor output
+and pass it to the model for inference. To post-process the model output, pass the outputs and
+original_sizes and reshaped_input_sizes we take from the processor's initial output. We need to pass these 
+since the processor resizes the image, and the output needs to be extrapolated.
+thon
+input_points = [[[2592, 1728]]] # point location of the bee
+inputs = processor(image, input_points=input_points, return_tensors="pt").to(device)
+with torch.no_grad():
+    outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
+``
+We can visualize the three masks in themasks` output.
+thon
+import torch
+import matplotlib.pyplot as plt
+import numpy as np
+fig, axes = plt.subplots(1, 4, figsize=(15, 5))
+axes[0].imshow(image)
+axes[0].set_title('Original Image')
+mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()]
+for i, mask in enumerate(mask_list, start=1):
+    overlayed_image = np.array(image).copy()
+overlayed_image[:,:,0] = np.where(mask == 1, 255, overlayed_image[:,:,0])
+overlayed_image[:,:,1] = np.where(mask == 1, 0, overlayed_image[:,:,1])
+overlayed_image[:,:,2] = np.where(mask == 1, 0, overlayed_image[:,:,2])
+
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91f605431d88496dc7c6bbac320ad2299ea13378
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_mask_generation.txt_chunk_3.txt
@@ -0,0 +1,44 @@
+axes[i].imshow(overlayed_image)
+axes[i].set_title(f'Mask {i}')
+
+for ax in axes:
+    ax.axis('off')
+plt.show()
+
+Box Prompting
+You can also do box prompting in a similar fashion to point prompting. You can simply pass the input box in the format of a list
+[x_min, y_min, x_max, y_max] format along with the image to the processor. Take the processor output and directly pass it 
+to the model, then post-process the output again.
+thon
+bounding box around the bee
+box = [2350, 1600, 2850, 2100]
+inputs = processor(
+        image,
+        input_boxes=[[[box]]],
+        return_tensors="pt"
+    ).to("cuda")
+with torch.no_grad():
+    outputs = model(**inputs)
+mask = processor.image_processor.post_process_masks(
+    outputs.pred_masks.cpu(),
+    inputs["original_sizes"].cpu(),
+    inputs["reshaped_input_sizes"].cpu()
+)[0][0][0].numpy()
+
+You can visualize the bounding box around the bee as shown below.
+thon
+import matplotlib.patches as patches
+fig, ax = plt.subplots()
+ax.imshow(image)
+rectangle = patches.Rectangle((2350, 1600, 500, 500, linewidth=2, edgecolor='r', facecolor='none')
+ax.add_patch(rectangle)
+ax.axis("off")
+plt.show()
+
+You can see the inference output below. 
+thon
+fig, ax = plt.subplots()
+ax.imshow(image)
+ax.imshow(mask, cmap='viridis', alpha=0.4)
+ax.axis("off")
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6d043232a1e3af591ce9617ec220f8d45811ee4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_0.txt
@@ -0,0 +1,32 @@
+Masked language modeling
+[[open-in-colab]]
+
+Masked language modeling predicts a masked token in a sequence, and the model can attend to tokens bidirectionally. This
+means the model has full access to the tokens on the left and right. Masked language modeling is great for tasks that
+require a good contextual understanding of an entire sequence. BERT is an example of a masked language model.
+This guide will show you how to:
+
+Finetune DistilRoBERTa on the r/askscience subset of the ELI5 dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load ELI5 dataset
+Start by loading the first 5000 examples from the ELI5-Category dataset with the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+eli5 = load_dataset("eli5_category", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b6ffbea2d583425bc34ad555141077b2869355bc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_1.txt
@@ -0,0 +1,27 @@
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+eli5 = eli5.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers': {'a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+  'text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+   'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+   'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+   'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+  'score': [21, 19, 5, 3],
+  'text_urls': [[],
+   [],
+   [],
+   ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']]},
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
+
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b8396f382389c40237176dd1acbfd3024dd84543
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_2.txt
@@ -0,0 +1,9 @@
+While this may look like a lot, you're only really interested in the text field. What's cool about language modeling tasks is you don't need labels (also known as an unsupervised task) because the next word is the label.
+Preprocess
+
+For masked language modeling, the next step is to load a DistilRoBERTa tokenizer to process the text subfield:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilroberta-base")
+
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b978a007dc889d47fa8af705de67cd188b6bdc62
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_3.txt
@@ -0,0 +1,21 @@
+You'll notice from the example above, the text field is actually nested inside answers. This means you'll need to extract the text subfield from its nested structure with the flatten method:
+
+eli5 = eli5.flatten()
+eli5["train"][0]
+{'q_id': '7h191n',
+ 'title': 'What does the tax bill that was passed today mean? How will it affect Americans in each tax bracket?',
+ 'selftext': '',
+ 'category': 'Economics',
+ 'subreddit': 'explainlikeimfive',
+ 'answers.a_id': ['dqnds8l', 'dqnd1jl', 'dqng3i1', 'dqnku5x'],
+ 'answers.text': ["The tax bill is 500 pages long and there were a lot of changes still going on right to the end. It's not just an adjustment to the income tax brackets, it's a whole bunch of changes. As such there is no good answer to your question. The big take aways are: - Big reduction in corporate income tax rate will make large companies very happy. - Pass through rate change will make certain styles of business (law firms, hedge funds) extremely happy - Income tax changes are moderate, and are set to expire (though it's the kind of thing that might just always get re-applied without being made permanent) - People in high tax states (California, New York) lose out, and many of them will end up with their taxes raised.",
+  'None yet. It has to be reconciled with a vastly different house bill and then passed again.',
+  'Also: does this apply to 2017 taxes? Or does it start with 2018 taxes?',
+  'This article explains both the House and senate bills, including the proposed changes to your income taxes based on your income level. URL_0'],
+ 'answers.score': [21, 19, 5, 3],
+ 'answers.text_urls': [[],
+  [],
+  [],
+  ['https://www.investopedia.com/news/trumps-tax-reform-what-can-be-done/']],
+ 'title_urls': ['url'],
+ 'selftext_urls': ['url']}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b075d11d48911d523350262cbd89bcd926901d96
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_4.txt
@@ -0,0 +1,40 @@
+Each subfield is now a separate column as indicated by the answers prefix, and the text field is a list now. Instead
+of tokenizing each sentence separately, convert the list to a string so you can jointly tokenize them.
+Here is a first preprocessing function to join the list of strings for each example and tokenize the result:
+
+def preprocess_function(examples):
+     return tokenizer([" ".join(x) for x in examples["answers.text"]])
+
+To apply this preprocessing function over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once, and increasing the number of processes with num_proc. Remove any columns you don't need:
+
+tokenized_eli5 = eli5.map(
+     preprocess_function,
+     batched=True,
+     num_proc=4,
+     remove_columns=eli5["train"].column_names,
+ )
+
+This dataset contains the token sequences, but some of these are longer than the maximum input length for the model.
+You can now use a second preprocessing function to
+- concatenate all the sequences
+- split the concatenated sequences into shorter chunks defined by block_size, which should be both shorter than the maximum input length and short enough for your GPU RAM. 
+
+block_size = 128
+def group_texts(examples):
+     # Concatenate all texts.
+     concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+     total_length = len(concatenated_examples[list(examples.keys())[0]])
+     # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+     # customize this part to your needs.
+     if total_length >= block_size:
+         total_length = (total_length // block_size) * block_size
+     # Split by chunks of block_size.
+     result = {
+         k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+         for k, t in concatenated_examples.items()
+     }
+     return result
+
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f823e22fcece2cdda8ac6bf87b88ec94fa8c2b0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_5.txt
@@ -0,0 +1,31 @@
+Apply the group_texts function over the entire dataset:
+
+lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)
+
+Now create a batch of examples using [DataCollatorForLanguageModeling]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+tokenizer.pad_token = tokenizer.eos_token
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15)
+
+Use the end-of-sequence token as the padding token and specify mlm_probability to randomly mask tokens each time you iterate over the data:
+
+from transformers import DataCollatorForLanguageModeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.15, return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilRoBERTa with [AutoModelForMaskedLM]:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, datasets, and data collator.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e2f87450609550fe2086580e2210ec5c8a13b0cd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_6.txt
@@ -0,0 +1,59 @@
+training_args = TrainingArguments(
+     output_dir="my_awesome_eli5_mlm_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=lm_dataset["train"],
+     eval_dataset=lm_dataset["test"],
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, use the [~transformers.Trainer.evaluate] method to evaluate your model and get its perplexity:
+
+import math
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+Perplexity: 8.76
+
+Then share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load DistilRoBERTa with [TFAutoModelForMaskedLM]:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     lm_dataset["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     lm_dataset["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14d028a771fafe0feadff64c05cd735ede598cbf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_7.txt
@@ -0,0 +1,44 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_eli5_mlm_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for masked language modeling, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like the model to fill in the blank with, and use the special <mask> token to indicate the blank:
+
+text = "The Milky Way is a  galaxy."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for fill-mask with your model, and pass your text to it. If you like, you can use the top_k parameter to specify how many predictions to return:
+
+from transformers import pipeline
+mask_filler = pipeline("fill-mask", "username/my_awesome_eli5_mlm_model")
+mask_filler(text, top_k=3)
+[{'score': 0.5150994658470154,
+  'token': 21300,
+  'token_str': ' spiral',
+  'sequence': 'The Milky Way is a spiral galaxy.'},
+ {'score': 0.07087188959121704,
+  'token': 2232,
+  'token_str': ' massive',
+  'sequence': 'The Milky Way is a massive galaxy.'},
+ {'score': 0.06434620916843414,
+  'token': 650,
+  'token_str': ' small',
+  'sequence': 'The Milky Way is a small galaxy.'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a93e8f65702e970255b6220753396e347dbaa0d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_8.txt
@@ -0,0 +1,40 @@
+Tokenize the text and return the input_ids as PyTorch tensors. You'll also need to specify the position of the <mask> token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="pt")
+mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import AutoModelForMaskedLM
+model = AutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = torch.topk(mask_token_logits, 3, dim=1).indices[0].tolist()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_idsas TensorFlow tensors. You'll also need to specify the position of the` token:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("username/my_awesome_eli5_mlm_model")
+inputs = tokenizer(text, return_tensors="tf")
+mask_token_index = tf.where(inputs["input_ids"] == tokenizer.mask_token_id)[0, 1]
+
+Pass your inputs to the model and return the logits of the masked token:
+
+from transformers import TFAutoModelForMaskedLM
+model = TFAutoModelForMaskedLM.from_pretrained("username/my_awesome_eli5_mlm_model")
+logits = model(**inputs).logits
+mask_token_logits = logits[0, mask_token_index, :]
+
+Then return the three masked tokens with the highest probability and print them out:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52d647162c8b3587786552039406774b9399df21
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_masked_language_modeling.txt_chunk_9.txt
@@ -0,0 +1,8 @@
+Then return the three masked tokens with the highest probability and print them out:
+
+top_3_tokens = tf.math.top_k(mask_token_logits, 3).indices.numpy()
+for token in top_3_tokens:
+     print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
+The Milky Way is a spiral galaxy.
+The Milky Way is a massive galaxy.
+The Milky Way is a small galaxy.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63fe76c0d0db0976934a59bdf09e0ed0f9e21637
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_0.txt
@@ -0,0 +1,45 @@
+Monocular depth estimation
+Monocular depth estimation is a computer vision task that involves predicting the depth information of a scene from a
+single image. In other words, it is the process of estimating the distance of objects in a scene from
+a single camera viewpoint.
+Monocular depth estimation has various applications, including 3D reconstruction, augmented reality, autonomous driving,
+and robotics. It is a challenging task as it requires the model to understand the complex relationships between objects
+in the scene and the corresponding depth information, which can be affected by factors such as lighting conditions,
+occlusion, and texture.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+In this guide you'll learn how to:
+
+create a depth estimation pipeline
+run depth estimation inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Depth estimation pipeline
+The simplest way to try out inference with a model supporting depth estimation is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+
+from transformers import pipeline
+checkpoint = "vinvino02/glpn-nyu"
+depth_estimator = pipeline("depth-estimation", model=checkpoint)
+
+Next, choose an image to analyze:
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/HwBAsSbPBDU/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MzR8fGNhciUyMGluJTIwdGhlJTIwc3RyZWV0fGVufDB8MHx8fDE2Nzg5MDEwODg&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Pass the image to the pipeline.
+
+predictions = depth_estimator(image)
+
+The pipeline returns a dictionary with two entries. The first one, called predicted_depth, is a tensor with the values
+being the depth expressed in meters for each pixel.
+The second one, depth, is a PIL image that visualizes the depth estimation result.
+Let's take a look at the visualized result:
+
+predictions["depth"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e84b93b0bad55ff166cc5fe1757856122f3d7565
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_monocular_depth_estimation.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+predictions["depth"]
+
+Depth estimation inference by hand
+Now that you've seen how to use the depth estimation pipeline, let's see how we can replicate the same result by hand.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+checkpoint = "vinvino02/glpn-nyu"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+model = AutoModelForDepthEstimation.from_pretrained(checkpoint)
+
+Prepare the image input for the model using the image_processor that will take care of the necessary image transformations
+such as resizing and normalization:
+
+pixel_values = image_processor(image, return_tensors="pt").pixel_values
+
+Pass the prepared inputs through the model:
+
+import torch
+with torch.no_grad():
+     outputs = model(pixel_values)
+     predicted_depth = outputs.predicted_depth
+
+Visualize the results:
+
+import numpy as np
+interpolate to original size
+prediction = torch.nn.functional.interpolate(
+     predicted_depth.unsqueeze(1),
+     size=image.size[::-1],
+     mode="bicubic",
+     align_corners=False,
+ ).squeeze()
+output = prediction.numpy()
+formatted = (output * 255 / np.max(output)).astype("uint8")
+depth = Image.fromarray(formatted)
+depth
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..134e8ba887b72dc9cd47cae3cf3563fd38bfa3f3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_0.txt
@@ -0,0 +1,50 @@
+Multiple choice
+[[open-in-colab]]
+A multiple choice task is similar to question answering, except several candidate answers are provided along with a context and the model is trained to select the correct answer.
+This guide will show you how to:
+
+Finetune BERT on the regular configuration of the SWAG dataset to select the best answer given multiple options and some context.
+Use your finetuned model for inference.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SWAG dataset
+Start by loading the regular configuration of the SWAG dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+swag = load_dataset("swag", "regular")
+
+Then take a look at an example:
+
+swag["train"][0]
+{'ending0': 'passes by walking down the street playing their instruments.',
+ 'ending1': 'has heard approaching them.',
+ 'ending2': "arrives and they're outside dancing and asleep.",
+ 'ending3': 'turns the lead singer watches the performance.',
+ 'fold-ind': '3416',
+ 'gold-source': 'gold',
+ 'label': 0,
+ 'sent1': 'Members of the procession walk down the street holding small horn brass instruments.',
+ 'sent2': 'A drum line',
+ 'startphrase': 'Members of the procession walk down the street holding small horn brass instruments. A drum line',
+ 'video-id': 'anetv_jkn6uvmqwh4'}
+
+While it looks like there are a lot of fields here, it is actually pretty straightforward:
+
+sent1 and sent2: these fields show how a sentence starts, and if you put the two together, you get the startphrase field.
+ending: suggests a possible ending for how a sentence can end, but only one of them is correct.
+label: identifies the correct sentence ending.
+
+Preprocess
+The next step is to load a BERT tokenizer to process the sentence starts and the four possible endings:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65910331b73d3b681f0c12c73c69bb9091593bd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_1.txt
@@ -0,0 +1,37 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+
+The preprocessing function you want to create needs to:
+
+Make four copies of the sent1 field and combine each of them with sent2 to recreate how a sentence starts.
+Combine sent2 with each of the four possible sentence endings.
+Flatten these two lists so you can tokenize them, and then unflatten them afterward so each example has a corresponding input_ids, attention_mask, and labels field.
+
+ending_names = ["ending0", "ending1", "ending2", "ending3"]
+def preprocess_function(examples):
+     first_sentences = [[context] * 4 for context in examples["sent1"]]
+     question_headers = examples["sent2"]
+     second_sentences = [
+         [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
+     ]
+
+     first_sentences = sum(first_sentences, [])
+     second_sentences = sum(second_sentences, [])
+     tokenized_examples = tokenizer(first_sentences, second_sentences, truncation=True)
+     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_swag = swag.map(preprocess_function, batched=True)
+🤗 Transformers doesn't have a data collator for multiple choice, so you'll need to adapt the [DataCollatorWithPadding] to create a batch of examples. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+DataCollatorForMultipleChoice flattens all the model inputs, applies padding, and then unflattens the results:
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import torch
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f82cbc71799b1d8c31bbb1cd855e321d9d3389cc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_2.txt
@@ -0,0 +1,35 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="pt",
+         )
+         batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
+         batch["labels"] = torch.tensor(labels, dtype=torch.int64)
+         return batch
+</pt>
+<tf>py
+
+from dataclasses import dataclass
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from typing import Optional, Union
+import tensorflow as tf
+@dataclass
+ class DataCollatorForMultipleChoice:
+     """
+     Data collator that will dynamically pad the inputs for multiple choice received.
+     """
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7bedf0a2a44be472c277fd9f0d87007ba7a9a8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_3.txt
@@ -0,0 +1,47 @@
+tokenizer: PreTrainedTokenizerBase
+     padding: Union[bool, str, PaddingStrategy] = True
+     max_length: Optional[int] = None
+     pad_to_multiple_of: Optional[int] = None
+     def call(self, features):
+         label_name = "label" if "label" in features[0].keys() else "labels"
+         labels = [feature.pop(label_name) for feature in features]
+         batch_size = len(features)
+         num_choices = len(features[0]["input_ids"])
+         flattened_features = [
+             [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
+         ]
+         flattened_features = sum(flattened_features, [])
+         batch = self.tokenizer.pad(
+             flattened_features,
+             padding=self.padding,
+             max_length=self.max_length,
+             pad_to_multiple_of=self.pad_to_multiple_of,
+             return_tensors="tf",
+         )
+         batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
+         batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
+         return batch
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load BERT with [AutoModelForMultipleChoice]:
+
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74c39403f2e28d2fd0c678129a3e23256d4dd2d5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_4.txt
@@ -0,0 +1,45 @@
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+model = AutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_swag_model",
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     learning_rate=5e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_swag["train"],
+     eval_dataset=tokenized_swag["validation"],
+     tokenizer=tokenizer,
+     data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 2
+total_train_steps = (len(tokenized_swag["train"]) // batch_size) * num_train_epochs
+optimizer, schedule = create_optimizer(init_lr=5e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d6254ea8d379b054d768c2b2f65102bd7faafcaf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_5.txt
@@ -0,0 +1,52 @@
+Then you can load BERT with [TFAutoModelForMultipleChoice]:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("google-bert/bert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_swag["train"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_swag["validation"],
+     shuffle=False,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=2, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50d7a1c928be9792ff2820d3ea5c140a069d9cd4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_multiple_choice.txt_chunk_6.txt
@@ -0,0 +1,51 @@
+For a more in-depth example of how to finetune a model for multiple choice, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text and two candidate answers:
+
+prompt = "France has a bread law, Le Décret Pain, with strict rules on what is allowed in a traditional baguette."
+candidate1 = "The law does not apply to croissants and brioche."
+candidate2 = "The law applies to baguettes."
+
+Tokenize each prompt and candidate answer pair and return PyTorch tensors. You should also create some labels:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="pt", padding=True)
+labels = torch.tensor(0).unsqueeze(0)
+
+Pass your inputs and labels to the model and return the logits:
+
+from transformers import AutoModelForMultipleChoice
+model = AutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+outputs = model(**{k: v.unsqueeze(0) for k, v in inputs.items()}, labels=labels)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = logits.argmax().item()
+predicted_class
+'0'
+
+Tokenize each prompt and candidate answer pair and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_swag_model")
+inputs = tokenizer([[prompt, candidate1], [prompt, candidate2]], return_tensors="tf", padding=True)
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForMultipleChoice
+model = TFAutoModelForMultipleChoice.from_pretrained("my_awesome_swag_model")
+inputs = {k: tf.expand_dims(v, 0) for k, v in inputs.items()}
+outputs = model(inputs)
+logits = outputs.logits
+
+Get the class with the highest probability:
+
+predicted_class = int(tf.math.argmax(logits, axis=-1)[0])
+predicted_class
+'0'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9db4e8533750c33f7676db4c0aa99af9fd5c5478
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Object detection
+[[open-in-colab]]
+Object detection is the computer vision task of detecting instances (such as humans, buildings, or cars) in an image. Object detection models receive an image as input and output
+coordinates of the bounding boxes and associated labels of the detected objects. An image can contain multiple objects,
+each with its own bounding box and a label (e.g. it can have a car and a building), and each object can
+be present in different parts of an image (e.g. the image can have several cars).
+This task is commonly used in autonomous driving for detecting things like pedestrians, road signs, and traffic lights.
+Other applications include counting objects in images, image search, and more.
+In this guide, you will learn how to:
+
+Finetune DETR, a model that combines a convolutional
+ backbone with an encoder-decoder Transformer, on the CPPE-5
+ dataset.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q datasets transformers accelerate timm
+pip install -q -U albumentations>=1.4.5 torchmetrics pycocotools
+You'll use 🤗 Datasets to load a dataset from the Hugging Face Hub, 🤗 Transformers to train your model,
+and albumentations to augment the data.
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+To get started, we'll define global constants, namely the model name and image size. For this tutorial, we'll use the conditional DETR model due to its faster convergence. Feel free to select any object detection model available in the transformers library.
+
+MODEL_NAME = "microsoft/conditional-detr-resnet-50"  # or "facebook/detr-resnet-50"
+IMAGE_SIZE = 480
+
+Load the CPPE-5 dataset
+The CPPE-5 dataset contains images with
+annotations identifying medical personal protective equipment (PPE) in the context of the COVID-19 pandemic.
+Start by loading the dataset and creating a validation split from train:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..778adf35880026c29a0de71c333941ed9fdb6f54
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+from datasets import load_dataset
+cppe5 = load_dataset("cppe-5")
+if "validation" not in cppe5:
+     split = cppe5["train"].train_test_split(0.15, seed=1337)
+     cppe5["train"] = split["train"]
+     cppe5["validation"] = split["test"]
+cppe5
+DatasetDict({
+    train: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 850
+    })
+    test: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 29
+    })
+    validation: Dataset({
+        features: ['image_id', 'image', 'width', 'height', 'objects'],
+        num_rows: 150
+    })
+})
+
+You'll see that this dataset has 1000 images for train and validation sets and a test set with 29 images.
+To get familiar with the data, explore what the examples look like.
+
+cppe5["train"][0]
+{
+  'image_id': 366,
+  'image': ,
+  'width': 500,
+  'height': 500,
+  'objects': {
+    'id': [1932, 1933, 1934],
+    'area': [27063, 34200, 32431],
+    'bbox': [[29.0, 11.0, 97.0, 279.0],
+      [201.0, 1.0, 120.0, 285.0],
+      [382.0, 0.0, 113.0, 287.0]],
+    'category': [0, 0, 0]
+  }
+}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af455e6071959478811e9d20b061f509458c897f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_10.txt
@@ -0,0 +1,38 @@
+You have successfully augmented the individual images and prepared their annotations. However, preprocessing isn't
+complete yet. In the final step, create a custom collate_fn to batch images together.
+Pad images (which are now pixel_values) to the largest image in a batch, and create a corresponding pixel_mask
+to indicate which pixels are real (1) and which are padding (0).
+
+import torch
+def collate_fn(batch):
+     data = {}
+     data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
+     data["labels"] = [x["labels"] for x in batch]
+     if "pixel_mask" in batch[0]:
+         data["pixel_mask"] = torch.stack([x["pixel_mask"] for x in batch])
+     return data
+
+Preparing function to compute mAP
+Object detection models are commonly evaluated with a set of COCO-style metrics. We are going to use torchmetrics to compute mAP (mean average precision) and mAR (mean average recall) metrics and will wrap it to compute_metrics function in order to use in [Trainer] for evaluation.
+Intermediate format of boxes used for training is YOLO (normalized) but we will compute metrics for boxes in Pascal VOC (absolute) format in order to correctly handle box areas. Let's define a function that converts bounding boxes to Pascal VOC format:
+
+from transformers.image_transforms import center_to_corners_format
+def convert_bbox_yolo_to_pascal(boxes, image_size):
+     """
+     Convert bounding boxes from YOLO format (x_center, y_center, width, height) in range [0, 1]
+     to Pascal VOC format (x_min, y_min, x_max, y_max) in absolute coordinates.
+
+     Args:
+         boxes (torch.Tensor): Bounding boxes in YOLO format
+         image_size (Tuple[int, int]): Image size in format (height, width)
+     Returns:
+         torch.Tensor: Bounding boxes in Pascal VOC format (x_min, y_min, x_max, y_max)
+     """
+     # convert center to corners format
+     boxes = center_to_corners_format(boxes)
+     # convert to absolute coordinates
+     height, width = image_size
+     boxes = boxes * torch.tensor([[width, height, width, height]])
+     return boxes
+
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d8f806811feff75980a9aae8e61f907f07a2b19
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_11.txt
@@ -0,0 +1,13 @@
+Then, in compute_metrics function we collect predicted and target bounding boxes, scores and labels from evaluation loop results and pass it to the scoring function.
+
+import numpy as np
+from dataclasses import dataclass
+from torchmetrics.detection.mean_ap import MeanAveragePrecision
+@dataclass
+class ModelOutput:
+     logits: torch.Tensor
+     pred_boxes: torch.Tensor
+@torch.no_grad()
+def compute_metrics(evaluation_results, image_processor, threshold=0.0, id2label=None):
+     """
+     Compute mean average mAP, mAR and their variants for the object detection task.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32d5645c6d1f572a23f796aad1ef043a0406e7f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_12.txt
@@ -0,0 +1,37 @@
+Args:
+         evaluation_results (EvalPrediction): Predictions and targets from evaluation.
+         threshold (float, optional): Threshold to filter predicted boxes by confidence. Defaults to 0.0.
+         id2label (Optional[dict], optional): Mapping from class id to class name. Defaults to None.
+     Returns:
+         Mapping[str, float]: Metrics in a form of dictionary {: }
+     """
+     predictions, targets = evaluation_results.predictions, evaluation_results.label_ids
+     # For metric computation we need to provide:
+     #  - targets in a form of list of dictionaries with keys "boxes", "labels"
+     #  - predictions in a form of list of dictionaries with keys "boxes", "scores", "labels"
+     image_sizes = []
+     post_processed_targets = []
+     post_processed_predictions = []
+     # Collect targets in the required format for metric computation
+     for batch in targets:
+         # collect image sizes, we will need them for predictions post processing
+         batch_image_sizes = torch.tensor(np.array([x["orig_size"] for x in batch]))
+         image_sizes.append(batch_image_sizes)
+         # collect targets in the required format for metric computation
+         # boxes were converted to YOLO format needed for model training
+         # here we will convert them to Pascal VOC format (x_min, y_min, x_max, y_max)
+         for image_target in batch:
+             boxes = torch.tensor(image_target["boxes"])
+             boxes = convert_bbox_yolo_to_pascal(boxes, image_target["orig_size"])
+             labels = torch.tensor(image_target["class_labels"])
+             post_processed_targets.append({"boxes": boxes, "labels": labels})
+     # Collect predictions in the required format for metric computation,
+     # model produce boxes in YOLO format, then image_processor convert them to Pascal VOC format
+     for batch, target_sizes in zip(predictions, image_sizes):
+         batch_logits, batch_boxes = batch[1], batch[2]
+         output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
+         post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfe0ec159b4ae8402aa056cec5e0bd433fb4198b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_13.txt
@@ -0,0 +1,18 @@
+post_processed_output = image_processor.post_process_object_detection(
+             output, threshold=threshold, target_sizes=target_sizes
+         )
+         post_processed_predictions.extend(post_processed_output)
+     # Compute metrics
+     metric = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
+     metric.update(post_processed_predictions, post_processed_targets)
+     metrics = metric.compute()
+     # Replace list of per class metrics with separate metric for each class
+     classes = metrics.pop("classes")
+     map_per_class = metrics.pop("map_per_class")
+     mar_100_per_class = metrics.pop("mar_100_per_class")
+     for class_id, class_map, class_mar in zip(classes, map_per_class, mar_100_per_class):
+         class_name = id2label[class_id.item()] if id2label is not None else class_id.item()
+         metrics[f"map_{class_name}"] = class_map
+         metrics[f"mar_100_{class_name}"] = class_mar
+     metrics = {k: round(v.item(), 4) for k, v in metrics.items()}
+     return metrics
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d3b91ea513647ea4469de4fa8c131e1ad69011e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_14.txt
@@ -0,0 +1,31 @@
+eval_compute_metrics_fn = partial(
+     compute_metrics, image_processor=image_processor, id2label=id2label, threshold=0.0
+ )
+
+Training the detection model
+You have done most of the heavy lifting in the previous sections, so now you are ready to train your model!
+The images in this dataset are still quite large, even after resizing. This means that finetuning this model will
+require at least one GPU.
+Training involves the following steps:
+1. Load the model with [AutoModelForObjectDetection] using the same checkpoint as in the preprocessing.
+2. Define your training hyperparameters in [TrainingArguments].
+3. Pass the training arguments to [Trainer] along with the model, dataset, image processor, and data collator.
+4. Call [~Trainer.train] to finetune your model.
+When loading the model from the same checkpoint that you used for the preprocessing, remember to pass the label2id
+and id2label maps that you created earlier from the dataset's metadata. Additionally, we specify ignore_mismatched_sizes=True to replace the existing classification head with a new one.
+
+from transformers import AutoModelForObjectDetection
+model = AutoModelForObjectDetection.from_pretrained(
+     MODEL_NAME,
+     id2label=id2label,
+     label2id=label2id,
+     ignore_mismatched_sizes=True,
+ )
+
+In the [TrainingArguments] use output_dir to specify where to save your model, then configure hyperparameters as you see fit. For num_train_epochs=30 training will take about 35 minutes in Google Colab T4 GPU, increase the number of epoch to get better results.
+Important notes:
+ - Do not remove unused columns because this will drop the image column. Without the image column, you
+can't create pixel_values. For this reason, set remove_unused_columns to False.
+ - Set eval_do_concat_batches=False to get proper evaluation results. Images have different number of target boxes, if batches are concatenated we will not be able to determine which boxes belongs to particular image.
+If you wish to share your model by pushing to the Hub, set push_to_hub to True (you must be signed in to Hugging
+Face to upload your model).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6bddbec2741aad550bf3b199f102c290d27e7539
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_15.txt
@@ -0,0 +1,89 @@
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+     output_dir="detr_finetuned_cppe5",
+     num_train_epochs=30,
+     fp16=False,
+     per_device_train_batch_size=8,
+     dataloader_num_workers=4,
+     learning_rate=5e-5,
+     lr_scheduler_type="cosine",
+     weight_decay=1e-4,
+     max_grad_norm=0.01,
+     metric_for_best_model="eval_map",
+     greater_is_better=True,
+     load_best_model_at_end=True,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     save_total_limit=2,
+     remove_unused_columns=False,
+     eval_do_concat_batches=False,
+     push_to_hub=True,
+ )
+
+Finally, bring everything together, and call [~transformers.Trainer.train]:
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=cppe5["train"],
+     eval_dataset=cppe5["validation"],
+     tokenizer=image_processor,
+     data_collator=collate_fn,
+     compute_metrics=eval_compute_metrics_fn,
+ )
+trainer.train()
+
+  [3210/3210 26:07, Epoch 30/30]
+
+Epoch
+Training Loss
+Validation Loss
+Map
+Map 50
+Map 75
+Map Small
+Map Medium
+Map Large
+Mar 1
+Mar 10
+Mar 100
+Mar Small
+Mar Medium
+Mar Large
+Map Coverall
+Mar 100 Coverall
+Map Face Shield
+Mar 100 Face Shield
+Map Gloves
+Mar 100 Gloves
+Map Goggles
+Mar 100 Goggles
+Map Mask
+Mar 100 Mask
+
+1
+No log
+2.629903
+0.008900
+0.023200
+0.006500
+0.001300
+0.002800
+0.020500
+0.021500
+0.070400
+0.101400
+0.007600
+0.106200
+0.096100
+0.036700
+0.232000
+0.000300
+0.019000
+0.003900
+0.125400
+0.000100
+0.003100
+0.003500
+0.127600
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7a3663f84d1037835a129278e2c401deb5aab03d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_16.txt
@@ -0,0 +1,103 @@
+2
+No log
+3.479864
+0.014800
+0.034600
+0.010800
+0.008600
+0.011700
+0.012500
+0.041100
+0.098700
+0.130000
+0.056000
+0.062200
+0.111900
+0.053500
+0.447300
+0.010600
+0.100000
+0.000200
+0.022800
+0.000100
+0.015400
+0.009700
+0.064400
+
+3
+No log
+2.107622
+0.041700
+0.094000
+0.034300
+0.024100
+0.026400
+0.047400
+0.091500
+0.182800
+0.225800
+0.087200
+0.199400
+0.210600
+0.150900
+0.571200
+0.017300
+0.101300
+0.007300
+0.180400
+0.002100
+0.026200
+0.031000
+0.250200
+
+4
+No log
+2.031242
+0.055900
+0.120600
+0.046900
+0.013800
+0.038100
+0.090300
+0.105900
+0.225600
+0.266100
+0.130200
+0.228100
+0.330000
+0.191000
+0.572100
+0.010600
+0.157000
+0.014600
+0.235300
+0.001700
+0.052300
+0.061800
+0.313800
+
+5
+3.889400
+1.883433
+0.089700
+0.201800
+0.067300
+0.022800
+0.065300
+0.129500
+0.136000
+0.272200
+0.303700
+0.112900
+0.312500
+0.424600
+0.300200
+0.585100
+0.032700
+0.202500
+0.031300
+0.271000
+0.008700
+0.126200
+0.075500
+0.333800
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..74e7776f9c5bf1ad60cd4cb0ade3edd14059efa1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_17.txt
@@ -0,0 +1,103 @@
+6
+3.889400
+1.807503
+0.118500
+0.270900
+0.090200
+0.034900
+0.076700
+0.152500
+0.146100
+0.297800
+0.325400
+0.171700
+0.283700
+0.545900
+0.396900
+0.554500
+0.043000
+0.262000
+0.054500
+0.271900
+0.020300
+0.230800
+0.077600
+0.308000
+
+7
+3.889400
+1.716169
+0.143500
+0.307700
+0.123200
+0.045800
+0.097800
+0.258300
+0.165300
+0.327700
+0.352600
+0.140900
+0.336700
+0.599400
+0.442900
+0.620700
+0.069400
+0.301300
+0.081600
+0.292000
+0.011000
+0.230800
+0.112700
+0.318200
+
+8
+3.889400
+1.679014
+0.153000
+0.355800
+0.127900
+0.038700
+0.115600
+0.291600
+0.176000
+0.322500
+0.349700
+0.135600
+0.326100
+0.643700
+0.431700
+0.582900
+0.069800
+0.265800
+0.088600
+0.274600
+0.028300
+0.280000
+0.146700
+0.345300
+
+9
+3.889400
+1.618239
+0.172100
+0.375300
+0.137600
+0.046100
+0.141700
+0.308500
+0.194000
+0.356200
+0.386200
+0.162400
+0.359200
+0.677700
+0.469800
+0.623900
+0.102100
+0.317700
+0.099100
+0.290200
+0.029300
+0.335400
+0.160200
+0.364000
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_18.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_18.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6b933be0cc5b90db11821cc01e84e0b6f73c100
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_18.txt
@@ -0,0 +1,103 @@
+10
+1.599700
+1.572512
+0.179500
+0.400400
+0.147200
+0.056500
+0.141700
+0.316700
+0.213100
+0.357600
+0.381300
+0.197900
+0.344300
+0.638500
+0.466900
+0.623900
+0.101300
+0.311400
+0.104700
+0.279500
+0.051600
+0.338500
+0.173000
+0.353300
+
+11
+1.599700
+1.528889
+0.192200
+0.415000
+0.160800
+0.053700
+0.150500
+0.378000
+0.211500
+0.371700
+0.397800
+0.204900
+0.374600
+0.684800
+0.491900
+0.632400
+0.131200
+0.346800
+0.122000
+0.300900
+0.038400
+0.344600
+0.177500
+0.364400
+
+12
+1.599700
+1.517532
+0.198300
+0.429800
+0.159800
+0.066400
+0.162900
+0.383300
+0.220700
+0.382100
+0.405400
+0.214800
+0.383200
+0.672900
+0.469000
+0.610400
+0.167800
+0.379700
+0.119700
+0.307100
+0.038100
+0.335400
+0.196800
+0.394200
+
+13
+1.599700
+1.488849
+0.209800
+0.452300
+0.172300
+0.094900
+0.171100
+0.437800
+0.222000
+0.379800
+0.411500
+0.203800
+0.397300
+0.707500
+0.470700
+0.620700
+0.186900
+0.407600
+0.124200
+0.306700
+0.059300
+0.355400
+0.207700
+0.367100
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_19.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_19.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5536ca2c80004d4641a50261c6fdb142d495b54f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_19.txt
@@ -0,0 +1,103 @@
+14
+1.599700
+1.482210
+0.228900
+0.482600
+0.187800
+0.083600
+0.191800
+0.444100
+0.225900
+0.376900
+0.407400
+0.182500
+0.384800
+0.700600
+0.512100
+0.640100
+0.175000
+0.363300
+0.144300
+0.300000
+0.083100
+0.363100
+0.229900
+0.370700
+
+15
+1.326800
+1.475198
+0.216300
+0.455600
+0.174900
+0.088500
+0.183500
+0.424400
+0.226900
+0.373400
+0.404300
+0.199200
+0.396400
+0.677800
+0.496300
+0.633800
+0.166300
+0.392400
+0.128900
+0.312900
+0.085200
+0.312300
+0.205000
+0.370200
+
+16
+1.326800
+1.459697
+0.233200
+0.504200
+0.192200
+0.096000
+0.202000
+0.430800
+0.239100
+0.382400
+0.412600
+0.219500
+0.403100
+0.670400
+0.485200
+0.625200
+0.196500
+0.410100
+0.135700
+0.299600
+0.123100
+0.356900
+0.225300
+0.371100
+
+17
+1.326800
+1.407340
+0.243400
+0.511900
+0.204500
+0.121000
+0.215700
+0.468000
+0.246200
+0.394600
+0.424200
+0.225900
+0.416100
+0.705200
+0.494900
+0.638300
+0.224900
+0.430400
+0.157200
+0.317900
+0.115700
+0.369200
+0.224200
+0.365300
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c216d7301056494daede0d55b7664c160a47ff8f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_2.txt
@@ -0,0 +1,14 @@
+The examples in the dataset have the following fields:
+- image_id: the example image id
+- image: a PIL.Image.Image object containing the image
+- width: width of the image
+- height: height of the image
+- objects: a dictionary containing bounding box metadata for the objects in the image:
+  - id: the annotation id
+  - area: the area of the bounding box
+  - bbox: the object's bounding box (in the COCO format )
+  - category: the object's category, with possible values including Coverall (0), Face_Shield (1), Gloves (2), Goggles (3) and Mask (4)
+You may notice that the bbox field follows the COCO format, which is the format that the DETR model expects.
+However, the grouping of the fields inside objects differs from the annotation format DETR requires. You will
+need to apply some preprocessing transformations before using this data for training.
+To get an even better understanding of the data, visualize an example in the dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_20.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_20.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c603b0f7d4b4d0ff468d6f9dfcfac547497fdd3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_20.txt
@@ -0,0 +1,103 @@
+18
+1.326800
+1.419522
+0.245100
+0.521500
+0.210000
+0.116100
+0.211500
+0.489900
+0.255400
+0.391600
+0.419700
+0.198800
+0.421200
+0.701400
+0.501800
+0.634200
+0.226700
+0.410100
+0.154400
+0.321400
+0.105900
+0.352300
+0.236700
+0.380400
+
+19
+1.158600
+1.398764
+0.253600
+0.519200
+0.213600
+0.135200
+0.207700
+0.491900
+0.257300
+0.397300
+0.428000
+0.241400
+0.401800
+0.703500
+0.509700
+0.631100
+0.236700
+0.441800
+0.155900
+0.330800
+0.128100
+0.352300
+0.237500
+0.384000
+
+20
+1.158600
+1.390591
+0.248800
+0.520200
+0.216600
+0.127500
+0.211400
+0.471900
+0.258300
+0.407000
+0.429100
+0.240300
+0.407600
+0.708500
+0.505800
+0.623400
+0.235500
+0.431600
+0.150000
+0.325000
+0.125700
+0.375400
+0.227200
+0.390200
+
+21
+1.158600
+1.360608
+0.262700
+0.544800
+0.222100
+0.134700
+0.230000
+0.487500
+0.269500
+0.413300
+0.436300
+0.236200
+0.419100
+0.709300
+0.514100
+0.637400
+0.257200
+0.450600
+0.165100
+0.338400
+0.139400
+0.372300
+0.237700
+0.382700
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_21.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_21.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6173f03ba33c0c9a663a1a411ac0d2e390efd735
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_21.txt
@@ -0,0 +1,103 @@
+22
+1.158600
+1.368296
+0.262800
+0.542400
+0.236400
+0.137400
+0.228100
+0.498500
+0.266500
+0.409000
+0.433000
+0.239900
+0.418500
+0.697500
+0.520500
+0.641000
+0.257500
+0.455700
+0.162600
+0.334800
+0.140200
+0.353800
+0.233200
+0.379600
+
+23
+1.158600
+1.368176
+0.264800
+0.541100
+0.233100
+0.138200
+0.223900
+0.498700
+0.272300
+0.407400
+0.434400
+0.233100
+0.418300
+0.702000
+0.524400
+0.642300
+0.262300
+0.444300
+0.159700
+0.335300
+0.140500
+0.366200
+0.236900
+0.384000
+
+24
+1.049700
+1.355271
+0.269700
+0.549200
+0.239100
+0.134700
+0.229900
+0.519200
+0.274800
+0.412700
+0.437600
+0.245400
+0.417200
+0.711200
+0.523200
+0.644100
+0.272100
+0.440500
+0.166700
+0.341500
+0.137700
+0.373800
+0.249000
+0.388000
+
+25
+1.049700
+1.355180
+0.272500
+0.547900
+0.243800
+0.149700
+0.229900
+0.523100
+0.272500
+0.415700
+0.442200
+0.256200
+0.420200
+0.705800
+0.523900
+0.639600
+0.271700
+0.451900
+0.166300
+0.346900
+0.153700
+0.383100
+0.247000
+0.389300
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_22.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_22.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a67ce8cf735aaf0b1b24a3ddbe0f9489921f3a7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_22.txt
@@ -0,0 +1,103 @@
+26
+1.049700
+1.349337
+0.275600
+0.556300
+0.246400
+0.146700
+0.234800
+0.516300
+0.274200
+0.418300
+0.440900
+0.248700
+0.418900
+0.705800
+0.523200
+0.636500
+0.274700
+0.440500
+0.172400
+0.349100
+0.155600
+0.384600
+0.252300
+0.393800
+
+27
+1.049700
+1.350782
+0.275200
+0.548700
+0.246800
+0.147300
+0.236400
+0.527200
+0.280100
+0.416200
+0.442600
+0.253400
+0.424000
+0.710300
+0.526600
+0.640100
+0.273200
+0.445600
+0.167000
+0.346900
+0.160100
+0.387700
+0.249200
+0.392900
+
+28
+1.049700
+1.346533
+0.277000
+0.552800
+0.252900
+0.147400
+0.240000
+0.527600
+0.280900
+0.420900
+0.444100
+0.255500
+0.424500
+0.711200
+0.530200
+0.646800
+0.277400
+0.441800
+0.170900
+0.346900
+0.156600
+0.389200
+0.249600
+0.396000
+
+29
+0.993700
+1.346575
+0.277100
+0.554800
+0.252900
+0.148400
+0.239700
+0.523600
+0.278400
+0.420000
+0.443300
+0.256300
+0.424000
+0.705600
+0.529600
+0.647300
+0.273900
+0.439200
+0.174300
+0.348700
+0.157600
+0.386200
+0.250100
+0.395100
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_23.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_23.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e529c609ea19dc653801e207900b2877540cdd0f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_23.txt
@@ -0,0 +1,34 @@
+30
+0.993700
+1.346446
+0.277400
+0.554700
+0.252700
+0.147900
+0.240800
+0.523600
+0.278800
+0.420400
+0.443300
+0.256100
+0.424200
+0.705500
+0.530100
+0.646800
+0.275600
+0.440500
+0.174500
+0.348700
+0.157300
+0.386200
+0.249200
+0.394200
+
+If you have set `push_to_hub` to `True` in the `training_args`, the training checkpoints are pushed to the
+Hugging Face Hub. Upon training completion, push the final model to the Hub as well by calling the [`~transformers.Trainer.push_to_hub`] method.
+
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_24.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_24.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07b59750a64a5283fb8fd33618855af81f968312
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_24.txt
@@ -0,0 +1,47 @@
+>>> trainer.push_to_hub()
+
+## Evaluate
+
+>>> from pprint import pprint
+
+>>> metrics = trainer.evaluate(eval_dataset=cppe5["test"], metric_key_prefix="test")
+>>> pprint(metrics)
+{'epoch': 30.0,
+  'test_loss': 1.0877351760864258,
+  'test_map': 0.4116,
+  'test_map_50': 0.741,
+  'test_map_75': 0.3663,
+  'test_map_Coverall': 0.5937,
+  'test_map_Face_Shield': 0.5863,
+  'test_map_Gloves': 0.3416,
+  'test_map_Goggles': 0.1468,
+  'test_map_Mask': 0.3894,
+  'test_map_large': 0.5637,
+  'test_map_medium': 0.3257,
+  'test_map_small': 0.3589,
+  'test_mar_1': 0.323,
+  'test_mar_10': 0.5237,
+  'test_mar_100': 0.5587,
+  'test_mar_100_Coverall': 0.6756,
+  'test_mar_100_Face_Shield': 0.7294,
+  'test_mar_100_Gloves': 0.4721,
+  'test_mar_100_Goggles': 0.4125,
+  'test_mar_100_Mask': 0.5038,
+  'test_mar_large': 0.7283,
+  'test_mar_medium': 0.4901,
+  'test_mar_small': 0.4469,
+  'test_runtime': 1.6526,
+  'test_samples_per_second': 17.548,
+  'test_steps_per_second': 2.42}
+
+These results can be further improved by adjusting the hyperparameters in [`TrainingArguments`]. Give it a go!
+
+## Inference
+
+Now that you have finetuned a model, evaluated it, and uploaded it to the Hugging Face Hub, you can use it for inference.
+
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_25.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_25.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52ae3e879b51f8c46badd43b0f632c008cd2e699
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_25.txt
@@ -0,0 +1,39 @@
+>>> import torch
+>>> import requests
+
+>>> from PIL import Image, ImageDraw
+>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
+
+>>> url = "https://images.pexels.com/photos/8413299/pexels-photo-8413299.jpeg?auto=compress&cs=tinysrgb&w=630&h=375&dpr=2"
+>>> image = Image.open(requests.get(url, stream=True).raw)
+
+Load model and image processor from the Hugging Face Hub (skip to use already trained in this session):
+
+>>> device = "cuda"
+>>> model_repo = "qubvel-hf/detr_finetuned_cppe5"
+
+>>> image_processor = AutoImageProcessor.from_pretrained(model_repo)
+>>> model = AutoModelForObjectDetection.from_pretrained(model_repo)
+>>> model = model.to(device)
+
+And detect bounding boxes:
+
+>>> with torch.no_grad():
+     inputs = image_processor(images=[image], return_tensors="pt")
+     outputs = model(**inputs.to(device))
+     target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+     results = image_processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=target_sizes)[0]
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     print(
+         f"Detected {model.config.id2label[label.item()]} with confidence "
+         f"{round(score.item(), 3)} at location {box}"
+     )
+Detected Gloves with confidence 0.683 at location [244.58, 124.33, 300.35, 185.13]
+Detected Mask with confidence 0.517 at location [143.73, 64.58, 219.57, 125.89]
+Detected Gloves with confidence 0.425 at location [179.15, 155.57, 262.4, 226.35]
+Detected Coverall with confidence 0.407 at location [307.13, -1.18, 477.82, 318.06]
+Detected Coverall with confidence 0.391 at location [68.61, 126.66, 309.03, 318.89]
+
+Let's plot the result:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_26.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_26.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f299ccc3e5b234363f93d0c06c6aacf2a53d99f1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_26.txt
@@ -0,0 +1,11 @@
+Let's plot the result:
+
+>>> draw = ImageDraw.Draw(image)
+
+>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+     box = [round(i, 2) for i in box.tolist()]
+     x, y, x2, y2 = tuple(box)
+     draw.rectangle((x, y, x2, y2), outline="red", width=1)
+     draw.text((x, y), model.config.id2label[label.item()], fill="white")
+
+>>> image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72f7129ff3296d5481a5bf1d6c5acbd234b8bf48
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_3.txt
@@ -0,0 +1,27 @@
+import numpy as np
+import os
+from PIL import Image, ImageDraw
+image = cppe5["train"][2]["image"]
+annotations = cppe5["train"][2]["objects"]
+draw = ImageDraw.Draw(image)
+categories = cppe5["train"].features["objects"].feature["category"].names
+id2label = {index: x for index, x in enumerate(categories, start=0)}
+label2id = {v: k for k, v in id2label.items()}
+for i in range(len(annotations["id"])):
+     box = annotations["bbox"][i]
+     class_idx = annotations["category"][i]
+     x, y, w, h = tuple(box)
+     # Check if coordinates are normalized or not
+     if max(box) > 1.0:
+         # Coordinates are un-normalized, no need to re-scale them
+         x1, y1 = int(x), int(y)
+         x2, y2 = int(x + w), int(y + h)
+     else:
+         # Coordinates are normalized, re-scale them
+         x1 = int(x * width)
+         y1 = int(y * height)
+         x2 = int((x + w) * width)
+         y2 = int((y + h) * height)
+     draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
+     draw.text((x, y), id2label[class_idx], fill="white")
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7aa201ea16f808b9fd1f49e5ef9a8e975735d78c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+To visualize the bounding boxes with associated labels, you can get the labels from the dataset's metadata, specifically
+the category field.
+You'll also want to create dictionaries that map a label id to a label class (id2label) and the other way around (label2id).
+You can use them later when setting up the model. Including these maps will make your model reusable by others if you share
+it on the Hugging Face Hub. Please note that, the part of above code that draws the bounding boxes assume that it is in COCO format (x_min, y_min, width, height). It has to be adjusted to work for other formats like (x_min, y_min, x_max, y_max).
+As a final step of getting familiar with the data, explore it for potential issues. One common problem with datasets for
+object detection is bounding boxes that "stretch" beyond the edge of the image. Such "runaway" bounding boxes can raise
+errors during training and should be addressed. There are a few examples with this issue in this dataset.
+To keep things simple in this guide, we will set clip=True for BboxParams in transformations below.
+Preprocess the data
+To finetune a model, you must preprocess the data you plan to use to match precisely the approach used for the pre-trained model.
+[AutoImageProcessor] takes care of processing image data to create pixel_values, pixel_mask, and
+labels that a DETR model can train with. The image processor has some attributes that you won't have to worry about:
+
+image_mean = [0.485, 0.456, 0.406 ]
+image_std = [0.229, 0.224, 0.225]
+
+These are the mean and standard deviation used to normalize images during the model pre-training. These values are crucial
+to replicate when doing inference or finetuning a pre-trained image model.
+Instantiate the image processor from the same checkpoint as the model you want to finetune.
+
+from transformers import AutoImageProcessor
+MAX_SIZE = IMAGE_SIZE
+image_processor = AutoImageProcessor.from_pretrained(
+     MODEL_NAME,
+     do_resize=True,
+     size={"max_height": MAX_SIZE, "max_width": MAX_SIZE},
+     do_pad=True,
+     pad_size={"height": MAX_SIZE, "width": MAX_SIZE},
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a540e4532ae4f4b6612f9480dac8d38a5214dd20
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_5.txt
@@ -0,0 +1,28 @@
+Before passing the images to the image_processor, apply two preprocessing transformations to the dataset:
+- Augmenting images
+- Reformatting annotations to meet DETR expectations
+First, to make sure the model does not overfit on the training data, you can apply image augmentation with any data augmentation library. Here we use Albumentations.
+This library ensures that transformations affect the image and update the bounding boxes accordingly.
+The 🤗 Datasets library documentation has a detailed guide on how to augment images for object detection,
+and it uses the exact same dataset as an example. Apply some geometric and color transformations to the image. For additional augmentation options, explore the Albumentations Demo Space.
+
+import albumentations as A
+train_augment_and_transform = A.Compose(
+     [
+         A.Perspective(p=0.1),
+         A.HorizontalFlip(p=0.5),
+         A.RandomBrightnessContrast(p=0.5),
+         A.HueSaturationValue(p=0.1),
+     ],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True, min_area=25),
+ )
+validation_transform = A.Compose(
+     [A.NoOp()],
+     bbox_params=A.BboxParams(format="coco", label_fields=["category"], clip=True),
+ )
+
+The image_processor expects the annotations to be in the following format: {'image_id': int, 'annotations': List[Dict]},
+ where each dictionary is a COCO object annotation. Let's add a function to reformat annotations for a single example:
+
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076c94bec4f35802f2419d5079f2b9927bd1bd12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_6.txt
@@ -0,0 +1,52 @@
+def format_image_annotations_as_coco(image_id, categories, areas, bboxes):
+     """Format one set of image annotations to the COCO format
+
+     Args:
+         image_id (str): image id. e.g. "0001"
+         categories (List[int]): list of categories/class labels corresponding to provided bounding boxes
+         areas (List[float]): list of corresponding areas to provided bounding boxes
+         bboxes (List[Tuple[float]]): list of bounding boxes provided in COCO format
+             ([center_x, center_y, width, height] in absolute coordinates)
+     Returns:
+         dict: {
+             "image_id": image id,
+             "annotations": list of formatted annotations
+         }
+     """
+     annotations = []
+     for category, area, bbox in zip(categories, areas, bboxes):
+         formatted_annotation = {
+             "image_id": image_id,
+             "category_id": category,
+             "iscrowd": 0,
+             "area": area,
+             "bbox": list(bbox),
+         }
+         annotations.append(formatted_annotation)
+     return {
+         "image_id": image_id,
+         "annotations": annotations,
+     }
+
+Now you can combine the image and annotation transformations to use on a batch of examples:
+
+def augment_and_transform_batch(examples, transform, image_processor, return_pixel_mask=False):
+     """Apply augmentations and format annotations in COCO format for object detection task"""
+
+     images = []
+     annotations = []
+     for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
+         image = np.array(image.convert("RGB"))
+         # apply augmentations
+         output = transform(image=image, bboxes=objects["bbox"], category=objects["category"])
+         images.append(output["image"])
+         # format annotations in COCO format
+         formatted_annotations = format_image_annotations_as_coco(
+             image_id, output["category"], objects["area"], output["bboxes"]
+         )
+         annotations.append(formatted_annotations)
+     # Apply the image processor transformations: resizing, rescaling, normalization
+     result = image_processor(images=images, annotations=annotations, return_tensors="pt")
+     if not return_pixel_mask:
+         result.pop("pixel_mask", None)
+     return result
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..22d04d7362f6723ec081f8721226906caf600ee9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_7.txt
@@ -0,0 +1,24 @@
+Apply this preprocessing function to the entire dataset using 🤗 Datasets [~datasets.Dataset.with_transform] method. This method applies
+transformations on the fly when you load an element of the dataset.
+At this point, you can check what an example from the dataset looks like after the transformations. You should see a tensor
+with pixel_values, a tensor with pixel_mask, and labels.
+
+from functools import partial
+Make transform functions for batch and apply for dataset splits
+train_transform_batch = partial(
+     augment_and_transform_batch, transform=train_augment_and_transform, image_processor=image_processor
+ )
+validation_transform_batch = partial(
+     augment_and_transform_batch, transform=validation_transform, image_processor=image_processor
+ )
+cppe5["train"] = cppe5["train"].with_transform(train_transform_batch)
+cppe5["validation"] = cppe5["validation"].with_transform(validation_transform_batch)
+cppe5["test"] = cppe5["test"].with_transform(validation_transform_batch)
+cppe5["train"][15]
+{'pixel_values': tensor([[[ 1.9235,  1.9407,  1.9749,  , -0.7822, -0.7479, -0.6965],
+          [ 1.9578,  1.9749,  1.9920,  , -0.7993, -0.7650, -0.7308],
+          [ 2.0092,  2.0092,  2.0263,  , -0.8507, -0.8164, -0.7822],
+          ,
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741],
+          [ 0.0741,  0.0741,  0.0741,  ,  0.0741,  0.0741,  0.0741]],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d8bae28d6d4dca91945aba297eb72e0abff8982
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_8.txt
@@ -0,0 +1,7 @@
+[[ 1.6232,  1.6408,  1.6583,  ,  0.8704,  1.0105,  1.1331],
+      [ 1.6408,  1.6583,  1.6758,  ,  0.8529,  0.9930,  1.0980],
+      [ 1.6933,  1.6933,  1.7108,  ,  0.8179,  0.9580,  1.0630],
+      ,
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052],
+      [ 0.2052,  0.2052,  0.2052,  ,  0.2052,  0.2052,  0.2052]],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..555198a82640ed45bbcde3fdb71ab04c80ec9c4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_object_detection.txt_chunk_9.txt
@@ -0,0 +1,13 @@
+[[ 1.8905,  1.9080,  1.9428,  , -0.1487, -0.0964, -0.0615],
+      [ 1.9254,  1.9428,  1.9603,  , -0.1661, -0.1138, -0.0790],
+      [ 1.9777,  1.9777,  1.9951,  , -0.2010, -0.1138, -0.0790],
+      ,
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265],
+      [ 0.4265,  0.4265,  0.4265,  ,  0.4265,  0.4265,  0.4265]]]),
+
+'labels': {'image_id': tensor([688]), 'class_labels': tensor([3, 4, 2, 0, 0]), 'boxes': tensor([[0.4700, 0.1933, 0.1467, 0.0767],
+          [0.4858, 0.2600, 0.1150, 0.1000],
+          [0.4042, 0.4517, 0.1217, 0.1300],
+          [0.4242, 0.3217, 0.3617, 0.5567],
+          [0.6617, 0.4033, 0.5400, 0.4533]]), 'area': tensor([ 4048.,  4140.,  5694., 72478., 88128.]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([480, 480])}}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fad0c1e23543edf240562d05f6a6225624b17548
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_0.txt
@@ -0,0 +1,29 @@
+LLM prompting guide
+[[open-in-colab]]
+Large Language Models such as Falcon, LLaMA, etc. are pretrained transformer models initially trained to predict the 
+next token given some input text. They typically have billions of parameters and have been trained on trillions of 
+tokens for an extended period of time. As a result, these models become quite powerful and versatile, and you can use 
+them to solve multiple NLP tasks out of the box by instructing the models with natural language prompts.
+Designing such prompts to ensure the optimal output is often called "prompt engineering". Prompt engineering is an 
+iterative process that requires a fair amount of experimentation. Natural languages are much more flexible and expressive 
+than programming languages, however, they can also introduce some ambiguity. At the same time, prompts in natural language 
+are quite sensitive to changes. Even minor modifications in prompts can lead to wildly different outputs.
+While there is no exact recipe for creating prompts to match all cases, researchers have worked out a number of best 
+practices that help to achieve optimal results more consistently. 
+This guide covers the prompt engineering best practices to help you craft better LLM prompts and solve various NLP tasks. 
+You'll learn:
+
+Basics of prompting
+Best practices of LLM prompting
+Advanced prompting techniques: few-shot prompting and chain-of-thought
+When to fine-tune instead of prompting
+
+Prompt engineering is only a part of the LLM output optimization process. Another essential component is choosing the 
+optimal text generation strategy. You can customize how your LLM selects each of the subsequent tokens when generating 
+the text without modifying any of the trainable parameters. By tweaking the text generation parameters, you can reduce 
+repetition in the generated text and make it more coherent and human-sounding. 
+Text generation strategies and parameters are out of scope for this guide, but you can learn more about these topics in 
+the following guides: 
+
+Generation with LLMs
+Text generation strategies
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77b844e4cb62fb148861879055814243eea3423f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_1.txt
@@ -0,0 +1,30 @@
+Generation with LLMs
+Text generation strategies
+
+Basics of prompting
+Types of models
+The majority of modern LLMs are decoder-only transformers. Some examples include: LLaMA, 
+Llama2, Falcon, GPT2. However, you may encounter
+encoder-decoder transformer LLMs as well, for instance, Flan-T5 and BART.
+Encoder-decoder-style models are typically used in generative tasks where the output heavily relies on the input, for 
+example, in translation and summarization. The decoder-only models are used for all other types of generative tasks.
+When using a pipeline to generate text with an LLM, it's important to know what type of LLM you are using, because 
+they use different pipelines. 
+Run inference with decoder-only models with the text-generation pipeline:
+thon
+
+from transformers import pipeline
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+generator = pipeline('text-generation', model = 'openai-community/gpt2')
+prompt = "Hello, I'm a language model"
+generator(prompt, max_length = 30)
+[{'generated_text': "Hello, I'm a language model programmer so you can use some of my stuff. But you also need some sort of a C program to run."}]
+
+To run inference with an encoder-decoder, use the text2text-generation pipeline:
+thon
+
+text2text_generator = pipeline("text2text-generation", model = 'google/flan-t5-base')
+prompt = "Translate from English to French: I'm very happy to see you"
+text2text_generator(prompt)
+[{'generated_text': 'Je suis très heureuse de vous rencontrer.'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c067f776e68b2981922e2aa5f8be26fb506f747
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_10.txt
@@ -0,0 +1,9 @@
+Your domain is wildly different from what LLMs were pre-trained on and extensive prompt optimization did not yield sufficient results. 
+You need your model to work well in a low-resource language.
+You need the model to be trained on sensitive data that is under strict regulations. 
+You have to use a small model due to cost, privacy, infrastructure or other limitations. 
+
+In all of the above examples, you will need to make sure that you either already have or can easily obtain a large enough 
+domain-specific dataset at a reasonable cost to fine-tune a model. You will also need to have enough time and resources 
+to fine-tune a model.
+If the above examples are not the case for you, optimizing prompts can prove to be more beneficial.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49e82d30134b0d4c40c9f8ef30f733f0addf2470
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+Base vs instruct/chat models
+Most of the recent LLM checkpoints available on 🤗 Hub come in two versions: base and instruct (or chat). For example, 
+tiiuae/falcon-7b and tiiuae/falcon-7b-instruct.
+Base models are excellent at completing the text when given an initial prompt, however, they are not ideal for NLP tasks 
+where they need to follow instructions, or for conversational use. This is where the instruct (chat) versions come in. 
+These checkpoints are the result of further fine-tuning of the pre-trained base versions on instructions and conversational data. 
+This additional fine-tuning makes them a better choice for many NLP tasks.  
+Let's illustrate some simple prompts that you can use with tiiuae/falcon-7b-instruct 
+to solve some common NLP tasks.
+NLP tasks
+First, let's set up the environment: 
+
+pip install -q transformers accelerate
+Next, let's load the model with the appropriate pipeline ("text-generation"): 
+thon
+
+from transformers import pipeline, AutoTokenizer
+import torch
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+model = "tiiuae/falcon-7b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipe = pipeline(
+     "text-generation",
+     model=model,
+     tokenizer=tokenizer,
+     torch_dtype=torch.bfloat16,
+     device_map="auto",
+ )
+
+Note that Falcon models were trained using the bfloat16 datatype, so we recommend you use the same. This requires a recent 
+version of CUDA and works best on modern cards.
+
+Now that we have the model loaded via the pipeline, let's explore how you can use prompts to solve NLP tasks.
+Text classification
+One of the most common forms of text classification is sentiment analysis, which assigns a label like "positive", "negative", 
+or "neutral" to a sequence of text. Let's write a prompt that instructs the model to classify a given text (a movie review). 
+We'll start by giving the instruction, and then specifying the text to classify. Note that instead of leaving it at that, we're 
+also adding the beginning of the response - "Sentiment: ":
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..145549e9f784963404b51373e88c86012a7fbb74
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_3.txt
@@ -0,0 +1,42 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Classify the text into neutral, negative or positive. 
+ Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+ Sentiment:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Classify the text into neutral, negative or positive. 
+Text: This movie is definitely one of my favorite movies of its kind. The interaction between respectable and morally strong characters is an ode to chivalry and the honor code amongst thieves and policemen.
+Sentiment:
+Positive
+
+As a result, the output contains a classification label from the list we have provided in the instructions, and it is a correct one!
+
+You may notice that in addition to the prompt, we pass a max_new_tokens parameter. It controls the number of tokens the 
+model shall generate, and it is one of the many text generation parameters that you can learn about 
+in Text generation strategies guide.
+
+Named Entity Recognition
+Named Entity Recognition (NER) is a task of finding named entities in a piece of text, such as a person, location, or organization.
+Let's modify the instructions in the prompt to make the LLM perform this task. Here, let's also set return_full_text = False 
+so that output doesn't contain the prompt:
+thon
+
+torch.manual_seed(1) # doctest: +IGNORE_RESULT
+prompt = """Return a list of named entities in the text.
+ Text: The Golden State Warriors are an American professional basketball team based in San Francisco.
+ Named entities:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=15,
+     return_full_text = False,  
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+- Golden State Warriors
+- San Francisco
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e84832e7496646e42f471116b3e4f32a0831582
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_4.txt
@@ -0,0 +1,30 @@
+As you can see, the model correctly identified two named entities from the given text.
+Translation
+Another task LLMs can perform is translation. You can choose to use encoder-decoder models for this task, however, here,
+for the simplicity of the examples, we'll keep using Falcon-7b-instruct, which does a decent job. Once again, here's how 
+you can write a basic prompt to instruct a model to translate a piece of text from English to Italian: 
+thon
+
+torch.manual_seed(2) # doctest: +IGNORE_RESULT
+prompt = """Translate the English text to Italian.
+ Text: Sometimes, I've believed as many as six impossible things before breakfast.
+ Translation:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=20,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+A volte, ho creduto a sei impossibili cose prima di colazione.
+
+Here we've added a do_sample=True and top_k=10 to allow the model to be a bit more flexible when generating output.
+Text summarization
+Similar to the translation, text summarization is another generative task where the output heavily relies on the input, 
+and encoder-decoder models can be a better choice. However, decoder-style models can be used for this task as well.
+Previously, we have placed the instructions at the very beginning of the prompt. However, the very end of the prompt can 
+also be a suitable location for instructions. Typically, it's better to place the instruction on one of the extreme ends.  
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c54e52f607aa02a83d8d78112a418edba2cf3fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_5.txt
@@ -0,0 +1,37 @@
+torch.manual_seed(3) # doctest: +IGNORE_RESULT
+prompt = """Permaculture is a design process mimicking the diversity, functionality and resilience of natural ecosystems. The principles and practices are drawn from traditional ecological knowledge of indigenous cultures combined with modern scientific understanding and technological innovations. Permaculture design provides a framework helping individuals and communities develop innovative, creative and effective strategies for meeting basic needs while preparing for and mitigating the projected impacts of climate change.
+ Write a summary of the above text.
+ Summary:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"{seq['generated_text']}")
+Permaculture is an ecological design mimicking natural ecosystems to meet basic needs and prepare for climate change. It is based on traditional knowledge and scientific understanding.
+
+Question answering
+For question answering task we can structure the prompt into the following logical components: instructions, context, question, and 
+the leading word or phrase ("Answer:") to nudge the model to start generating the answer:
+thon
+
+torch.manual_seed(4) # doctest: +IGNORE_RESULT
+prompt = """Answer the question using the context below.
+ Context: Gazpacho is a cold soup and drink made of raw, blended vegetables. Most gazpacho includes stale bread, tomato, cucumbers, onion, bell peppers, garlic, olive oil, wine vinegar, water, and salt. Northern recipes often include cumin and/or pimentón (smoked sweet paprika). Traditionally, gazpacho was made by pounding the vegetables in a mortar with a pestle; this more laborious method is still sometimes used as it helps keep the gazpacho cool and avoids the foam and silky consistency of smoothie versions made in blenders or food processors.
+ Question: What modern tool is used to make gazpacho?
+ Answer:
+ """
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Modern tools often used to make gazpacho include
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..561d3b46696fa18ebe045bae71739a6e8b07f111
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_6.txt
@@ -0,0 +1,42 @@
+Reasoning
+Reasoning is one of the most difficult tasks for LLMs, and achieving good results often requires applying advanced prompting techniques, like 
+Chain-of-though.
+Let's try if we can make a model reason about a simple arithmetics task with a basic prompt: 
+thon
+
+torch.manual_seed(5) # doctest: +IGNORE_RESULT
+prompt = """There are 5 groups of students in the class. Each group has 4 students. How many students are there in the class?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=30,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+There are a total of 5 groups, so there are 5 x 4=20 students in the class.
+
+Correct! Let's increase the complexity a little and see if we can still get away with a basic prompt:
+thon
+
+torch.manual_seed(6) # doctest: +IGNORE_RESULT
+prompt = """I baked 15 muffins. I ate 2 muffins and gave 5 muffins to a neighbor. My partner then bought 6 more muffins and ate 2. How many muffins do we now have?"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=10,
+     do_sample=True,
+     top_k=10,
+     return_full_text = False,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: 
+The total number of muffins now is 21
+
+This is a wrong answer, it should be 12. In this case, this can be due to the prompt being too basic, or due to the choice 
+of model, after all we've picked the smallest version of Falcon. Reasoning is difficult for models of all sizes, but larger 
+models are likely to perform better. 
+Best practices of LLM prompting
+In this section of the guide we have compiled a list of best practices that tend to improve the prompt results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32c52b249cd08c406f7b92876070a7f39c621db9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_7.txt
@@ -0,0 +1,23 @@
+When choosing the model to work with, the latest and most capable models are likely to perform better. 
+Start with a simple and short prompt, and iterate from there.
+Put the instructions at the beginning of the prompt, or at the very end. When working with large context, models apply various optimizations to prevent Attention complexity from scaling quadratically. This may make a model more attentive to the beginning or end of a prompt than the middle.
+Clearly separate instructions from the text they apply to - more on this in the next section. 
+Be specific and descriptive about the task and the desired outcome - its format, length, style, language, etc.
+Avoid ambiguous descriptions and instructions.
+Favor instructions that say "what to do" instead of those that say "what not to do".
+"Lead" the output in the right direction by writing the first word (or even begin the first sentence for the model).
+Use advanced techniques like Few-shot prompting and Chain-of-thought
+Test your prompts with different models to assess their robustness. 
+Version and track the performance of your prompts. 
+
+Advanced prompting techniques
+Few-shot prompting
+The basic prompts in the sections above are the examples of "zero-shot" prompts, meaning, the model has been given 
+instructions and context, but no examples with solutions. LLMs that have been fine-tuned on instruction datasets, generally 
+perform well on such "zero-shot" tasks. However, you may find that your task has more complexity or nuance, and, perhaps, 
+you have some requirements for the output that the model doesn't catch on just from the instructions. In this case, you can 
+try the technique called few-shot prompting. 
+In few-shot prompting, we provide examples in the prompt giving the model more context to improve the performance. 
+The examples condition the model to generate the output following the patterns in the examples.
+Here's an example: 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75fc16b91699b68de4b946c6e284d083e0a6a439
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_8.txt
@@ -0,0 +1,17 @@
+torch.manual_seed(0) # doctest: +IGNORE_RESULT
+prompt = """Text: The first human went into space and orbited the Earth on April 12, 1961.
+ Date: 04/12/1961
+ Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+ Date:"""
+sequences = pipe(
+     prompt,
+     max_new_tokens=8,
+     do_sample=True,
+     top_k=10,
+ )
+for seq in sequences:
+     print(f"Result: {seq['generated_text']}")
+Result: Text: The first human went into space and orbited the Earth on April 12, 1961.
+Date: 04/12/1961
+Text: The first-ever televised presidential debate in the United States took place on September 28, 1960, between presidential candidates John F. Kennedy and Richard Nixon. 
+Date: 09/28/1960
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3d85f627dabdcf5662735d240eb75b735d51662
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_prompting.txt_chunk_9.txt
@@ -0,0 +1,26 @@
+In the above code snippet we used a single example to demonstrate the desired output to the model, so this can be called a 
+"one-shot" prompting. However, depending on the task complexity you may need to use more than one example. 
+Limitations of the few-shot prompting technique: 
+- While LLMs can pick up on the patterns in the examples, these technique doesn't work well on complex reasoning tasks
+- Few-shot prompting requires creating lengthy prompts. Prompts with large number of tokens can increase computation and latency. There's also a limit to the length of the prompts.
+- Sometimes when given a number of examples, models can learn patterns that you didn't intend them to learn, e.g. that the third movie review is always negative.
+Chain-of-thought
+Chain-of-thought (CoT) prompting is a technique that nudges a model to produce intermediate reasoning steps thus improving 
+the results on complex reasoning tasks. 
+There are two ways of steering a model to producing the reasoning steps:
+- few-shot prompting by illustrating examples with detailed answers to questions, showing the model how to work through a problem.
+- by instructing the model to reason by adding phrases like "Let's think step by step" or "Take a deep breath and work through the problem step by step."
+If we apply the CoT technique to the muffins example from the reasoning section and use a larger model, 
+such as (tiiuae/falcon-180B-chat) which you can play with in the HuggingChat, 
+we'll get a significant improvement on the reasoning result:
+text
+Let's go through this step-by-step:
+1. You start with 15 muffins.
+2. You eat 2 muffins, leaving you with 13 muffins.
+3. You give 5 muffins to your neighbor, leaving you with 8 muffins.
+4. Your partner buys 6 more muffins, bringing the total number of muffins to 14.
+5. Your partner eats 2 muffins, leaving you with 12 muffins.
+Therefore, you now have 12 muffins.
+Prompting vs fine-tuning
+You can achieve great results by optimizing your prompts, however, you may still ponder whether fine-tuning a model 
+would work better for your case. Here are some scenarios when fine-tuning a smaller model may be a preferred option:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f82fb211252213605f29704ab10ccf409bc33b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Question answering
+[[open-in-colab]]
+
+Question answering tasks return an answer given a question. If you've ever asked a virtual assistant like Alexa, Siri or Google what the weather is, then you've used a question answering model before. There are two common types of question answering tasks:
+
+Extractive: extract the answer from the given context.
+Abstractive: generate an answer from the context that correctly answers the question.
+
+This guide will show you how to:
+
+Finetune DistilBERT on the SQuAD dataset for extractive question answering.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load SQuAD dataset
+Start by loading a smaller subset of the SQuAD dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+squad = load_dataset("squad", split="train[:5000]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5178f395ee4eeba68dc59ad85f4a0ae2db9d792
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_1.txt
@@ -0,0 +1,28 @@
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+squad = squad.train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+squad["train"][0]
+{'answers': {'answer_start': [515], 'text': ['Saint Bernadette Soubirous']},
+ 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
+ 'id': '5733be284776f41900661182',
+ 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
+ 'title': 'University_of_Notre_Dame'
+}
+
+There are several important fields here:
+
+answers: the starting location of the answer token and the answer text.
+context: background information from which the model needs to extract the answer.
+question: the question a model should answer.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to process the question and context fields:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..570ed9aabf8faddbec1827de8a0dbe6bb9ff9bf0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_2.txt
@@ -0,0 +1,23 @@
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+There are a few preprocessing steps particular to question answering tasks you should be aware of:
+
+Some examples in a dataset may have a very long context that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the context by setting truncation="only_second".
+Next, map the start and end positions of the answer to the original context by setting
+   return_offset_mapping=True.
+With the mapping in hand, now you can find the start and end tokens of the answer. Use the [~tokenizers.Encoding.sequence_ids] method to
+   find which part of the offset corresponds to the question and which corresponds to the context.
+
+Here is how you can create a function to truncate and map the start and end tokens of the answer to the context:
+
+def preprocess_function(examples):
+     questions = [q.strip() for q in examples["question"]]
+     inputs = tokenizer(
+         questions,
+         examples["context"],
+         max_length=384,
+         truncation="only_second",
+         return_offsets_mapping=True,
+         padding="max_length",
+     )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a22dcd660d433dc3cac73b49321d83e2c0f9944
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_3.txt
@@ -0,0 +1,49 @@
+offset_mapping = inputs.pop("offset_mapping")
+     answers = examples["answers"]
+     start_positions = []
+     end_positions = []
+     for i, offset in enumerate(offset_mapping):
+         answer = answers[i]
+         start_char = answer["answer_start"][0]
+         end_char = answer["answer_start"][0] + len(answer["text"][0])
+         sequence_ids = inputs.sequence_ids(i)
+         # Find the start and end of the context
+         idx = 0
+         while sequence_ids[idx] != 1:
+             idx += 1
+         context_start = idx
+         while sequence_ids[idx] == 1:
+             idx += 1
+         context_end = idx - 1
+         # If the answer is not fully inside the context, label it (0, 0)
+         if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
+             start_positions.append(0)
+             end_positions.append(0)
+         else:
+             # Otherwise it's the start and end token positions
+             idx = context_start
+             while idx <= context_end and offset[idx][0] <= start_char:
+                 idx += 1
+             start_positions.append(idx - 1)
+             idx = context_end
+             while idx >= context_start and offset[idx][1] >= end_char:
+                 idx -= 1
+             end_positions.append(idx + 1)
+     inputs["start_positions"] = start_positions
+     inputs["end_positions"] = end_positions
+     return inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once. Remove any columns you don't need:
+
+tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
+
+Now create a batch of examples using [DefaultDataCollator]. Unlike other data collators in 🤗 Transformers, the [DefaultDataCollator] does not apply any additional preprocessing such as padding.
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0b53192f7fffa56656f65f29d35a55c726c7af4e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_4.txt
@@ -0,0 +1,49 @@
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+</pt>
+<tf>py
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForQuestionAnswering]:
+
+from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer
+model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model).
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, and data collator.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_qa_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=3,
+     weight_decay=0.01,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_squad["train"],
+     eval_dataset=tokenized_squad["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c54ed515e1945eb7089c5d46fd11489145cca4ea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_5.txt
@@ -0,0 +1,54 @@
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_epochs = 2
+total_train_steps = (len(tokenized_squad["train"]) // batch_size) * num_epochs
+optimizer, schedule = create_optimizer(
+     init_lr=2e-5,
+     num_warmup_steps=0,
+     num_train_steps=total_train_steps,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForQuestionAnswering]:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased")
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_squad["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_squad["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)
+
+The last thing to setup before you start training is to provide a way to push your model to the Hub. This can be done by specifying where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+callback = PushToHubCallback(
+     output_dir="my_awesome_qa_model",
+     tokenizer=tokenizer,
+ )
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callback to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=[callback])
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4ac4998a67f19f85e08fbe8f0728b20f8b4f74f9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_6.txt
@@ -0,0 +1,46 @@
+For a more in-depth example of how to finetune a model for question answering, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Evaluate
+Evaluation for question answering requires a significant amount of postprocessing. To avoid taking up too much of your time, this guide skips the evaluation step. The [Trainer] still calculates the evaluation loss during training so you're not completely in the dark about your model's performance.
+If have more time and you're interested in how to evaluate your model for question answering, take a look at the Question answering chapter from the 🤗 Hugging Face Course!
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with a question and some context you'd like the model to predict:
+
+question = "How many programming languages does BLOOM support?"
+context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for question answering with your model, and pass your text to it:
+
+from transformers import pipeline
+question_answerer = pipeline("question-answering", model="my_awesome_qa_model")
+question_answerer(question=question, context=context)
+{'score': 0.2058267742395401,
+ 'start': 10,
+ 'end': 95,
+ 'answer': '176 billion parameters and can generate text in 46 languages natural languages and 13'}
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, context, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+import torch
+from transformers import AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+with torch.no_grad():
+     outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e6c0b76cb49edb315dc72af63f6e9329206b629f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_question_answering.txt_chunk_7.txt
@@ -0,0 +1,33 @@
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = outputs.start_logits.argmax()
+answer_end_index = outputs.end_logits.argmax()
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
+inputs = tokenizer(question, text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForQuestionAnswering
+model = TFAutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
+outputs = model(**inputs)
+
+Get the highest probability from the model output for the start and end positions:
+
+answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
+answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
+
+Decode the predicted tokens to get the answer:
+
+predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
+tokenizer.decode(predict_answer_tokens)
+'176 billion parameters and can generate text in 46 languages natural languages and 13'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af8ed54356e89a21a43e94f982002e3c7d766b17
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Image Segmentation
+[[open-in-colab]]
+
+Image segmentation models separate areas corresponding to different areas of interest in an image. These models work by assigning a label to each pixel. There are several types of segmentation: semantic segmentation, instance segmentation, and panoptic segmentation.
+In this guide, we will:
+1. Take a look at different types of segmentation.
+2. Have an end-to-end fine-tuning example for semantic segmentation.
+Before you begin, make sure you have all the necessary libraries installed:
+
+uncomment to install the necessary libraries
+!pip install -q datasets transformers evaluate accelerate
+
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Types of Segmentation
+Semantic segmentation assigns a label or class to every single pixel in an image. Let's take a look at a semantic segmentation model output. It will assign the same class to every instance of an object it comes across in an image, for example, all cats will be labeled as "cat" instead of "cat-1", "cat-2".
+We can use transformers' image segmentation pipeline to quickly infer a semantic segmentation model. Let's take a look at the example image.
+thon
+from transformers import pipeline
+from PIL import Image
+import requests
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/segmentation_input.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+We will use nvidia/segformer-b1-finetuned-cityscapes-1024-1024.
+python
+semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+results = semantic_segmentation(image)
+results
+The segmentation pipeline output includes a mask for every predicted class.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eb40bb95e5e12d01c22378f0d83a640ca910759a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+[{'score': None,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'wall',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'traffic sign',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'terrain',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': None,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Taking a look at the mask for the car class, we can see every car is classified with the same mask.
+python
+results[-1]["mask"]
+
+In instance segmentation, the goal is not to classify every pixel, but to predict a mask for every instance of an object in a given image. It works very similar to object detection, where there is a bounding box for every instance, there's a segmentation mask instead. We will use facebook/mask2former-swin-large-cityscapes-instance for this.
+python
+instance_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-instance")
+results = instance_segmentation(image)
+results
+As you can see below, there are multiple cars classified, and there's no classification for pixels other than pixels that belong to car and person instances.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e56bf565d32f8a1d5ea9e8a4dd37fba2e3a03c4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_10.txt
@@ -0,0 +1,55 @@
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you are unfamiliar with fine-tuning a model with Keras, check out the basic tutorial first!
+
+To fine-tune a model in TensorFlow, follow these steps:
+1. Define the training hyperparameters, and set up an optimizer and a learning rate schedule.
+2. Instantiate a pretrained model.
+3. Convert a 🤗 Dataset to a tf.data.Dataset.
+4. Compile your model.
+5. Add callbacks to calculate metrics and upload your model to 🤗 Hub
+6. Use the fit() method to run the training.
+Start by defining the hyperparameters, optimizer and learning rate schedule:
+
+from transformers import create_optimizer
+batch_size = 2
+num_epochs = 50
+num_train_steps = len(train_ds) * num_epochs
+learning_rate = 6e-5
+weight_decay_rate = 0.01
+optimizer, lr_schedule = create_optimizer(
+     init_lr=learning_rate,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=weight_decay_rate,
+     num_warmup_steps=0,
+ )
+
+Then, load SegFormer with [TFAutoModelForSemanticSegmentation] along with the label mappings, and compile it with the
+optimizer. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained(
+     checkpoint,
+     id2label=id2label,
+     label2id=label2id,
+ )
+model.compile(optimizer=optimizer)  # No loss argument!
+
+Convert your datasets to the tf.data.Dataset format using the [~datasets.Dataset.to_tf_dataset] and the [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator(return_tensors="tf")
+tf_train_dataset = train_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
+tf_eval_dataset = test_ds.to_tf_dataset(
+     columns=["pixel_values", "label"],
+     shuffle=True,
+     batch_size=batch_size,
+     collate_fn=data_collator,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_11.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef112cd42863a3bfda7d7457315d7fb76322d75d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_11.txt
@@ -0,0 +1,56 @@
+To compute the accuracy from the predictions and push your model to the 🤗 Hub, use Keras callbacks.
+Pass your compute_metrics function to [KerasMetricCallback],
+and use the [PushToHubCallback] to upload the model:
+
+from transformers.keras_callbacks import KerasMetricCallback, PushToHubCallback
+metric_callback = KerasMetricCallback(
+     metric_fn=compute_metrics, eval_dataset=tf_eval_dataset, batch_size=batch_size, label_cols=["labels"]
+ )
+push_to_hub_callback = PushToHubCallback(output_dir="scene_segmentation", tokenizer=image_processor)
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you are ready to train your model! Call fit() with your training and validation datasets, the number of epochs,
+and your callbacks to fine-tune the model:
+
+model.fit(
+     tf_train_dataset,
+     validation_data=tf_eval_dataset,
+     callbacks=callbacks,
+     epochs=num_epochs,
+ )
+
+Congratulations! You have fine-tuned your model and shared it on the 🤗 Hub. You can now use it for inference!
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Reload the dataset and load an image for inference.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+ds = ds.train_test_split(test_size=0.2)
+test_ds = ds["test"]
+image = ds["test"][0]["image"]
+image
+
+We will now see how to infer without a pipeline. Process the image with an image processor and place the pixel_values on a GPU:
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # use GPU if available, otherwise use a CPU
+encoding = image_processor(image, return_tensors="pt")
+pixel_values = encoding.pixel_values.to(device)
+
+Pass your input to the model and return the logits:
+
+outputs = model(pixel_values=pixel_values)
+logits = outputs.logits.cpu()
+
+Next, rescale the logits to the original image size:
+
+upsampled_logits = nn.functional.interpolate(
+     logits,
+     size=image.size[::-1],
+     mode="bilinear",
+     align_corners=False,
+ )
+pred_seg = upsampled_logits.argmax(dim=1)[0]
+
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_12.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b794d4f7a33e888c82c3f0008ea1a1e617e2d9b4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_12.txt
@@ -0,0 +1,21 @@
+Load an image processor to preprocess the image and return the input as TensorFlow tensors:
+
+from transformers import AutoImageProcessor
+image_processor = AutoImageProcessor.from_pretrained("MariaK/scene_segmentation")
+inputs = image_processor(image, return_tensors="tf")
+
+Pass your input to the model and return the logits:
+
+from transformers import TFAutoModelForSemanticSegmentation
+model = TFAutoModelForSemanticSegmentation.from_pretrained("MariaK/scene_segmentation")
+logits = model(**inputs).logits
+
+Next, rescale the logits to the original image size and apply argmax on the class dimension:
+
+logits = tf.transpose(logits, [0, 2, 3, 1])
+upsampled_logits = tf.image.resize(
+     logits,
+     # We reverse the shape of image because image.size returns width and height.
+     image.size[::-1],
+ )
+pred_seg = tf.math.argmax(upsampled_logits, axis=-1)[0]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_13.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e304ca0fe1744750fda49aa922238e7b986c328
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_13.txt
@@ -0,0 +1,46 @@
+To visualize the results, load the dataset color palette as ade_palette() that maps each class to their RGB values.
+py
+def ade_palette():
+  return np.asarray([
+      [0, 0, 0],
+      [120, 120, 120],
+      [180, 120, 120],
+      [6, 230, 230],
+      [80, 50, 50],
+      [4, 200, 3],
+      [120, 120, 80],
+      [140, 140, 140],
+      [204, 5, 255],
+      [230, 230, 230],
+      [4, 250, 7],
+      [224, 5, 255],
+      [235, 255, 7],
+      [150, 5, 61],
+      [120, 120, 70],
+      [8, 255, 51],
+      [255, 6, 82],
+      [143, 255, 140],
+      [204, 255, 4],
+      [255, 51, 7],
+      [204, 70, 3],
+      [0, 102, 200],
+      [61, 230, 250],
+      [255, 6, 51],
+      [11, 102, 255],
+      [255, 7, 71],
+      [255, 9, 224],
+      [9, 7, 230],
+      [220, 220, 220],
+      [255, 9, 92],
+      [112, 9, 255],
+      [8, 255, 214],
+      [7, 255, 224],
+      [255, 184, 6],
+      [10, 255, 71],
+      [255, 41, 10],
+      [7, 255, 255],
+      [224, 255, 8],
+      [102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_14.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb23dbdb36f746510b8a2f9c1fcc77f76c8649f4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_14.txt
@@ -0,0 +1,45 @@
+[102, 8, 255],
+      [255, 61, 6],
+      [255, 194, 7],
+      [255, 122, 8],
+      [0, 255, 20],
+      [255, 8, 41],
+      [255, 5, 153],
+      [6, 51, 255],
+      [235, 12, 255],
+      [160, 150, 20],
+      [0, 163, 255],
+      [140, 140, 140],
+      [250, 10, 15],
+      [20, 255, 0],
+      [31, 255, 0],
+      [255, 31, 0],
+      [255, 224, 0],
+      [153, 255, 0],
+      [0, 0, 255],
+      [255, 71, 0],
+      [0, 235, 255],
+      [0, 173, 255],
+      [31, 0, 255],
+      [11, 200, 200],
+      [255, 82, 0],
+      [0, 255, 245],
+      [0, 61, 255],
+      [0, 255, 112],
+      [0, 255, 133],
+      [255, 0, 0],
+      [255, 163, 0],
+      [255, 102, 0],
+      [194, 255, 0],
+      [0, 143, 255],
+      [51, 255, 0],
+      [0, 82, 255],
+      [0, 255, 41],
+      [0, 255, 173],
+      [10, 0, 255],
+      [173, 255, 0],
+      [0, 255, 153],
+      [255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_15.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e95d10976542c51b49c0669c290f4bd4773ab6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_15.txt
@@ -0,0 +1,45 @@
+[255, 92, 0],
+      [255, 0, 255],
+      [255, 0, 245],
+      [255, 0, 102],
+      [255, 173, 0],
+      [255, 0, 20],
+      [255, 184, 184],
+      [0, 31, 255],
+      [0, 255, 61],
+      [0, 71, 255],
+      [255, 0, 204],
+      [0, 255, 194],
+      [0, 255, 82],
+      [0, 10, 255],
+      [0, 112, 255],
+      [51, 0, 255],
+      [0, 194, 255],
+      [0, 122, 255],
+      [0, 255, 163],
+      [255, 153, 0],
+      [0, 255, 10],
+      [255, 112, 0],
+      [143, 255, 0],
+      [82, 0, 255],
+      [163, 255, 0],
+      [255, 235, 0],
+      [8, 184, 170],
+      [133, 0, 255],
+      [0, 255, 92],
+      [184, 0, 255],
+      [255, 0, 31],
+      [0, 184, 255],
+      [0, 214, 255],
+      [255, 0, 112],
+      [92, 255, 0],
+      [0, 224, 255],
+      [112, 224, 255],
+      [70, 184, 160],
+      [163, 0, 255],
+      [153, 0, 255],
+      [71, 255, 0],
+      [255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_16.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0dcf1d54eacbdd4152ebd0d9b4a939f8b5796e7f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_16.txt
@@ -0,0 +1,33 @@
+[255, 0, 163],
+      [255, 204, 0],
+      [255, 0, 143],
+      [0, 255, 235],
+      [133, 255, 0],
+      [255, 0, 235],
+      [245, 0, 255],
+      [255, 0, 122],
+      [255, 245, 0],
+      [10, 190, 212],
+      [214, 255, 0],
+      [0, 204, 255],
+      [20, 0, 255],
+      [255, 255, 0],
+      [0, 153, 255],
+      [0, 41, 255],
+      [0, 255, 204],
+      [41, 0, 255],
+      [41, 255, 0],
+      [173, 0, 255],
+      [0, 245, 255],
+      [71, 0, 255],
+      [122, 0, 255],
+      [0, 255, 184],
+      [0, 92, 255],
+      [184, 255, 0],
+      [0, 133, 255],
+      [255, 214, 0],
+      [25, 194, 194],
+      [102, 255, 0],
+      [92, 0, 255],
+  ])
+Then you can combine and plot your image and the predicted segmentation map:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_17.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de327c3d0fa30a1ac6efd00b18e375aaf7a8be95
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_17.txt
@@ -0,0 +1,12 @@
+import matplotlib.pyplot as plt
+import numpy as np
+color_seg = np.zeros((pred_seg.shape[0], pred_seg.shape[1], 3), dtype=np.uint8)
+palette = np.array(ade_palette())
+for label, color in enumerate(palette):
+     color_seg[pred_seg == label, :] = color
+color_seg = color_seg[, ::-1]  # convert to BGR
+img = np.array(image) * 0.5 + color_seg * 0.5  # plot the image with the segmentation map
+img = img.astype(np.uint8)
+plt.figure(figsize=(15, 10))
+plt.imshow(img)
+plt.show()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f56c6b0f9dd43e50a731ec5313448a8a9c3e19e3
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_2.txt
@@ -0,0 +1,22 @@
+[{'score': 0.999944,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999945,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999652,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.903529,
+  'label': 'person',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Checking out one of the car masks below.
+python
+results[2]["mask"]
+
+Panoptic segmentation combines semantic segmentation and instance segmentation, where every pixel is classified into a class and an instance of that class, and there are multiple masks for each instance of a class. We can use facebook/mask2former-swin-large-cityscapes-panoptic for this.
+python
+panoptic_segmentation = pipeline("image-segmentation", "facebook/mask2former-swin-large-cityscapes-panoptic")
+results = panoptic_segmentation(image)
+results
+As you can see below, we have more classes. We will later illustrate to see that every pixel is classified into one of the classes.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1bfc66cddc2af553e9bdc0a4d21f71be9695cbb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_3.txt
@@ -0,0 +1,41 @@
+[{'score': 0.999981,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99997,
+  'label': 'vegetation',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999575,
+  'label': 'pole',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999958,
+  'label': 'building',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999634,
+  'label': 'road',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.996092,
+  'label': 'sidewalk',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.999221,
+  'label': 'car',
+  'mask': <PIL.Image.Image image mode=L size=612x415>},
+ {'score': 0.99987,
+  'label': 'sky',
+  'mask': <PIL.Image.Image image mode=L size=612x415>}]
+Let's have a side by side comparison for all types of segmentation.
+
+Seeing all types of segmentation, let's have a deep dive on fine-tuning a model for semantic segmentation.
+Common real-world applications of semantic segmentation include training self-driving cars to identify pedestrians and important traffic information, identifying cells and abnormalities in medical imagery, and monitoring environmental changes from satellite imagery.
+Fine-tuning a Model for Segmentation
+We will now:
+
+Finetune SegFormer on the SceneParse150 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a974c022f9447ab4affd37851b79ac50e4a5d6eb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_4.txt
@@ -0,0 +1,43 @@
+Load SceneParse150 dataset
+Start by loading a smaller subset of the SceneParse150 dataset from the 🤗 Datasets library. This'll give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from datasets import load_dataset
+ds = load_dataset("scene_parse_150", split="train[:50]")
+
+Split the dataset's train split into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+ds = ds.train_test_split(test_size=0.2)
+train_ds = ds["train"]
+test_ds = ds["test"]
+
+Then take a look at an example:
+
+train_ds[0]
+{'image': ,
+ 'annotation': ,
+ 'scene_category': 368}
+
+view the image
+
+train_ds[0]["image"]
+
+image: a PIL image of the scene.
+annotation: a PIL image of the segmentation map, which is also the model's target.
+scene_category: a category id that describes the image scene like "kitchen" or "office". In this guide, you'll only need image and annotation, both of which are PIL images.
+
+You'll also want to create a dictionary that maps a label id to a label class which will be useful when you set up the model later. Download the mappings from the Hub and create the id2label and label2id dictionaries:
+
+import json
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+repo_id = "huggingface/label-files"
+filename = "ade20k-id2label.json"
+id2label = json.loads(Path(hf_hub_download(repo_id, filename, repo_type="dataset")).read_text())
+id2label = {int(k): v for k, v in id2label.items()}
+label2id = {v: k for k, v in id2label.items()}
+num_labels = len(id2label)
+
+Custom dataset
+You could also create and use your own dataset if you prefer to train with the run_semantic_segmentation.py script instead of a notebook instance. The script requires:
+
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63a3d19cef6ca92acd9758b2acaa9951e9e4673c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_5.txt
@@ -0,0 +1,39 @@
+a [~datasets.DatasetDict] with two [~datasets.Image] columns, "image" and "label"
+
+ from datasets import Dataset, DatasetDict, Image
+image_paths_train = ["path/to/image_1.jpg/jpg", "path/to/image_2.jpg/jpg", , "path/to/image_n.jpg/jpg"]
+ label_paths_train = ["path/to/annotation_1.png", "path/to/annotation_2.png", , "path/to/annotation_n.png"]
+image_paths_validation = []
+ label_paths_validation = []
+def create_dataset(image_paths, label_paths):
+     dataset = Dataset.from_dict({"image": sorted(image_paths),
+                                 "label": sorted(label_paths)})
+     dataset = dataset.cast_column("image", Image())
+     dataset = dataset.cast_column("label", Image())
+     return dataset
+# step 1: create Dataset objects
+ train_dataset = create_dataset(image_paths_train, label_paths_train)
+ validation_dataset = create_dataset(image_paths_validation, label_paths_validation)
+# step 2: create DatasetDict
+ dataset = DatasetDict({
+      "train": train_dataset,
+      "validation": validation_dataset,
+      }
+ )
+# step 3: push to Hub (assumes you have ran the huggingface-cli login command in a terminal/notebook)
+ dataset.push_to_hub("your-name/dataset-repo")
+# optionally, you can push to a private repo on the Hub
+ # dataset.push_to_hub("name of repo on the hub", private=True)
+ 
+
+an id2label dictionary mapping the class integers to their class names
+py
+ import json
+ # simple example
+ id2label = {0: 'cat', 1: 'dog'}
+ with open('id2label.json', 'w') as fp:
+ json.dump(id2label, fp)
+
+As an example, take a look at this example dataset which was created with the steps shown above.
+Preprocess
+The next step is to load a SegFormer image processor to prepare the images and annotations for the model. Some datasets, like this one, use the zero-index as the background class. However, the background class isn't actually included in the 150 classes, so you'll need to set do_reduce_labels=True to subtract one from all the labels. The zero-index is replaced by 255 so it's ignored by SegFormer's loss function:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c8fa27d922e0e2f1c98b0ad7fb847e2ce8a0b32
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_6.txt
@@ -0,0 +1,33 @@
+from transformers import AutoImageProcessor
+checkpoint = "nvidia/mit-b0"
+image_processor = AutoImageProcessor.from_pretrained(checkpoint, do_reduce_labels=True)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting. In this guide, you'll use the ColorJitter function from torchvision to randomly change the color properties of an image, but you can also use any image library you like.
+
+from torchvision.transforms import ColorJitter
+jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)
+
+Now create two preprocessing functions to prepare the images and annotations for the model. These functions convert the images into pixel_values and annotations to labels. For the training set, jitter is applied before providing the images to the image processor. For the test set, the image processor crops and normalizes the images, and only crops the labels because no data augmentation is applied during testing.
+
+def train_transforms(example_batch):
+     images = [jitter(x) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [x for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the jitter over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function. The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+It is common to apply some data augmentations to an image dataset to make a model more robust against overfitting.
+In this guide, you'll use tf.image to randomly change the color properties of an image, but you can also use any image
+library you like.
+Define two separate transformation functions:
+- training data transformations that include image augmentation
+- validation data transformations that only transpose the images, since computer vision models in 🤗 Transformers expect channels-first layout
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5eae6dd8c6c9c0dc65b3d00ecd991813f3d7bfd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_7.txt
@@ -0,0 +1,40 @@
+import tensorflow as tf
+def aug_transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.image.random_brightness(image, 0.25)
+     image = tf.image.random_contrast(image, 0.5, 2.0)
+     image = tf.image.random_saturation(image, 0.75, 1.25)
+     image = tf.image.random_hue(image, 0.1)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+def transforms(image):
+     image = tf.keras.utils.img_to_array(image)
+     image = tf.transpose(image, (2, 0, 1))
+     return image
+
+Next, create two preprocessing functions to prepare batches of images and annotations for the model. These functions apply
+the image transformations and use the earlier loaded image_processor to convert the images into pixel_values and
+annotations to labels. ImageProcessor also takes care of resizing and normalizing the images.
+
+def train_transforms(example_batch):
+     images = [aug_transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+def val_transforms(example_batch):
+     images = [transforms(x.convert("RGB")) for x in example_batch["image"]]
+     labels = [x for x in example_batch["annotation"]]
+     inputs = image_processor(images, labels)
+     return inputs
+
+To apply the preprocessing transformations over the entire dataset, use the 🤗 Datasets [~datasets.Dataset.set_transform] function.
+The transform is applied on the fly which is faster and consumes less disk space:
+
+train_ds.set_transform(train_transforms)
+test_ds.set_transform(val_transforms)
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load an evaluation method with the 🤗 Evaluate library. For this task, load the mean Intersection over Union (IoU) metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("mean_iou")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c48a63558c8e5bc642ad56f17081f4f21ca27ac
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_8.txt
@@ -0,0 +1,60 @@
+import evaluate
+metric = evaluate.load("mean_iou")
+
+Then create a function to [~evaluate.EvaluationModule.compute] the metrics. Your predictions need to be converted to
+logits first, and then reshaped to match the size of the labels before you can call [~evaluate.EvaluationModule.compute]:
+
+import numpy as np
+import torch
+from torch import nn
+def compute_metrics(eval_pred):
+     with torch.no_grad():
+         logits, labels = eval_pred
+         logits_tensor = torch.from_numpy(logits)
+         logits_tensor = nn.functional.interpolate(
+             logits_tensor,
+             size=labels.shape[-2:],
+             mode="bilinear",
+             align_corners=False,
+         ).argmax(dim=1)
+
+         pred_labels = logits_tensor.detach().cpu().numpy()
+         metrics = metric.compute(
+             predictions=pred_labels,
+             references=labels,
+             num_labels=num_labels,
+             ignore_index=255,
+             reduce_labels=False,
+         )
+         for key, value in metrics.items():
+             if isinstance(value, np.ndarray):
+                 metrics[key] = value.tolist()
+         return metrics
+
+def compute_metrics(eval_pred):
+     logits, labels = eval_pred
+     logits = tf.transpose(logits, perm=[0, 2, 3, 1])
+     logits_resized = tf.image.resize(
+         logits,
+         size=tf.shape(labels)[1:],
+         method="bilinear",
+     )
+
+     pred_labels = tf.argmax(logits_resized, axis=-1)
+     metrics = metric.compute(
+         predictions=pred_labels,
+         references=labels,
+         num_labels=num_labels,
+         ignore_index=-1,
+         reduce_labels=image_processor.do_reduce_labels,
+     )
+     per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
+     per_category_iou = metrics.pop("per_category_iou").tolist()
+     metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
+     metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
+     return {"val_" + k: v for k, v in metrics.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d2c62168268cd14592492a7b8617590cffb250b
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_semantic_segmentation.txt_chunk_9.txt
@@ -0,0 +1,44 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load SegFormer with [AutoModelForSemanticSegmentation], and pass the model the mapping between label ids and label classes:
+
+from transformers import AutoModelForSemanticSegmentation, TrainingArguments, Trainer
+model = AutoModelForSemanticSegmentation.from_pretrained(checkpoint, id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. It is important you don't remove unused columns because this'll drop the image column. Without the image column, you can't create pixel_values. Set remove_unused_columns=False to prevent this behavior! The only other required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the IoU metric and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="segformer-b0-scene-parse-150",
+     learning_rate=6e-5,
+     num_train_epochs=50,
+     per_device_train_batch_size=2,
+     per_device_eval_batch_size=2,
+     save_total_limit=3,
+     eval_strategy="steps",
+     save_strategy="steps",
+     save_steps=20,
+     eval_steps=20,
+     logging_steps=1,
+     eval_accumulation_steps=5,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=train_ds,
+     eval_dataset=test_ds,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ef9d7614ddcf196f19708cad8f5ddb2c155f59f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_0.txt
@@ -0,0 +1,26 @@
+Text classification
+[[open-in-colab]]
+
+Text classification is a common NLP task that assigns a label or class to text. Some of the largest companies run text classification in production for a wide range of practical applications. One of the most popular forms of text classification is sentiment analysis, which assigns a label like 🙂 positive, 🙁 negative, or 😐 neutral to a sequence of text.
+This guide will show you how to:
+
+Finetune DistilBERT on the IMDb dataset to determine whether a movie review is positive or negative.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate accelerate
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c66f859e8ffc54c1dcdf492611f5b5f547ac28f7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_1.txt
@@ -0,0 +1,26 @@
+Load IMDb dataset
+Start by loading the IMDb dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+imdb = load_dataset("imdb")
+
+Then take a look at an example:
+
+imdb["test"][0]
+{
+    "label": 0,
+    "text": "I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \"Gene Roddenberry's Earth\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.",
+}
+
+There are two fields in this dataset:
+
+text: the movie review text.
+label: a value that is either 0 for a negative review or 1 for a positive review.
+
+Preprocess
+The next step is to load a DistilBERT tokenizer to preprocess the text field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3de11dfdea976fca9b931a4c44378aacedd6db79
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+Create a preprocessing function to tokenize text and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def preprocess_function(examples):
+     return tokenizer(examples["text"], truncation=True)
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up map by setting batched=True to process multiple elements of the dataset at once:
+py
+tokenized_imdb = imdb.map(preprocess_function, batched=True)
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the accuracy metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+accuracy = evaluate.load("accuracy")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the accuracy:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     predictions = np.argmax(predictions, axis=1)
+     return accuracy.compute(predictions=predictions, references=labels)
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {0: "NEGATIVE", 1: "POSITIVE"}
+label2id = {"NEGATIVE": 0, "POSITIVE": 1}
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6453e4871b0aa07553787a998feb7e0532fd1b7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
+model = AutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the accuracy and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_imdb["train"],
+     eval_dataset=tokenized_imdb["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+[Trainer] applies dynamic padding by default when you pass tokenizer to it. In this case, you don't need to specify a data collator explicitly.
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a2d060a35334ff7a76d516753007da1d64ce38c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_4.txt
@@ -0,0 +1,48 @@
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+import tensorflow as tf
+batch_size = 16
+num_epochs = 5
+batches_per_epoch = len(tokenized_imdb["train"]) // batch_size
+total_train_steps = int(batches_per_epoch * num_epochs)
+optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
+
+Then you can load DistilBERT with [TFAutoModelForSequenceClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_imdb["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_imdb["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the accuracy from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a4bef1c753a16315fd84488bb60e928465f3488
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_5.txt
@@ -0,0 +1,52 @@
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for text classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for sentiment analysis with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("sentiment-analysis", model="stevhliu/my_awesome_model")
+classifier(text)
+[{'label': 'POSITIVE', 'score': 0.9994940757751465}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71bfdc4651ca6028b1edaddbe74bef610198eb12
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_sequence_classification.txt_chunk_6.txt
@@ -0,0 +1,28 @@
+from transformers import AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = logits.argmax().item()
+model.config.id2label[predicted_class_id]
+'POSITIVE'
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForSequenceClassification
+model = TFAutoModelForSequenceClassification.from_pretrained("stevhliu/my_awesome_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0])
+model.config.id2label[predicted_class_id]
+'POSITIVE'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eeb82332ebd358d48e16f3ba332467c64a157dad
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Summarization
+[[open-in-colab]]
+
+Summarization creates a shorter version of a document or an article that captures all the important information. Along with translation, it is another example of a task that can be formulated as a sequence-to-sequence task. Summarization can be:
+
+Extractive: extract the most relevant information from a document.
+Abstractive: generate new text that captures the most relevant information.
+
+This guide will show you how to:
+
+Finetune T5 on the California state bill subset of the BillSum dataset for abstractive summarization.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate rouge_score
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load BillSum dataset
+Start by loading the smaller California state bill subset of the BillSum dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+billsum = load_dataset("billsum", split="ca_test")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+billsum = billsum.train_test_split(test_size=0.2)
+
+Then take a look at an example:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04b7e97b16ea7e0230728530f0f2dcb8c723f59f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_1.txt
@@ -0,0 +1,2 @@
+billsum["train"][0]
+{'summary': 'Existing law authorizes state agencies to enter into contracts for the acquisition of goods or services upon approval by the Department of General Services. Existing law sets forth various requirements and prohibitions for those contracts, including, but not limited to, a prohibition on entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between spouses and domestic partners or same-sex and different-sex couples in the provision of benefits. Existing law provides that a contract entered into in violation of those requirements and prohibitions is void and authorizes the state or any person acting on behalf of the state to bring a civil action seeking a determination that a contract is in violation and therefore void. Under existing law, a willful violation of those requirements and prohibitions is a misdemeanor.\nThis bill would also prohibit a state agency from entering into contracts for the acquisition of goods or services of $100,000 or more with a contractor that discriminates between employees on the basis of gender identity in the provision of benefits, as specified. By expanding the scope of a crime, this bill would impose a state-mandated local program.\nThe California Constitution requires the state to reimburse local agencies and school districts for certain costs mandated by the state. Statutory provisions establish procedures for making that reimbursement.\nThis bill would provide that no reimbursement is required by this act for a specified reason.',
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df981cc0962371df7f0412c0f124278006bd52ce
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_2.txt
@@ -0,0 +1,2 @@
+
+ 'text': 'The people of the State of California do enact as follows:\n\n\nSECTION 1.\nSection 10295.35 is added to the Public Contract Code, to read:\n10295.35.\n(a) (1) Notwithstanding any other law, a state agency shall not enter into any contract for the acquisition of goods or services in the amount of one hundred thousand dollars ($100,000) or more with a contractor that, in the provision of benefits, discriminates between employees on the basis of an employee’s or dependent’s actual or perceived gender identity, including, but not limited to, the employee’s or dependent’s identification as transgender.\n(2) For purposes of this section, “contract” includes contracts with a cumulative amount of one hundred thousand dollars ($100,000) or more per contractor in each fiscal year.\n(3) For purposes of this section, an employee health plan is discriminatory if the plan is not consistent with Section 1365.5 of the Health and Safety Code and Section 10140 of the Insurance Code.\n(4) The requirements of this section shall apply only to those portions of a contractor’s operations that occur under any of the following conditions:\n(A) Within the state.\n(B) On real property outside the state if the property is owned by the state or if the state has a right to occupy the property, and if the contractor’s presence at that location is connected to a contract with the state.\n(C) Elsewhere in the United States where work related to a state contract is being performed.\n(b) Contractors shall treat as confidential, to the maximum extent allowed by law or by the requirement of the contractor’s insurance provider, any request by an employee or applicant for employment benefits or any documentation of eligibility for benefits submitted by an employee or applicant for employment.\n(c) After taking all reasonable measures to find a contractor that complies with this section, as determined by the state agency, the requirements of this section may be waived under any of the following circumstances:\n(1) There is only one prospective contractor willing to enter into a specific contract with the state agency.\n(2) The contract is necessary to respond to an emergency, as determined by the state agency, that endangers the public health, welfare, or safety, or the contract is necessary for the provision of essential services, and no entity that complies with the requirements of this section capable of responding to the emergency is immediately available.\n(3) The requirements of this section violate, or are inconsistent with, the terms or conditions of a grant, subvention, or agreement, if the agency has made a good faith attempt to change the terms or conditions of any grant, subvention, or agreement to authorize application of this section.\n(4) The contractor is providing wholesale or bulk water, power, or natural gas, the conveyance or transmission of the same, or ancillary services, as required for ensuring reliable services in accordance with good utility practice, if the purchase of the same cannot practically be accomplished through the standard competitive bidding procedures and the contractor is not providing direct retail services to end users.\n(d) (1) A contractor shall not be deemed to discriminate in the provision of benefits if the contractor, in providing the benefits, pays the actual costs incurred in obtaining the benefit.\n(2) If a contractor is unable to provide a certain benefit, despite taking reasonable measures to do so, the contractor shall not be deemed to discriminate in the provision of benefits.\n(e) (1) Every contract subject to this chapter shall contain a statement by which the contractor certifies that the contractor is in compliance with this section.\n(2) The department or other contracting agency shall enforce this section pursuant to its existing enforcement powers.\n(3) (A) If a contractor falsely certifies that it is in compliance with this section, the contract with that contractor shall be subject to Article 9 (commencing with Section 10420), unless, within a time period specified by the department or other contracting agency, the contractor provides to the department or agency proof that it has complied, or is in the process of complying, with this section.\n(B) The application of the remedies or penalties contained in Article 9 (commencing with Section 10420) to a contract subject to this chapter shall not preclude the application of any existing remedies otherwise available to the department or other contracting agency under its existing enforcement powers.\n(f) Nothing in this section is intended to regulate the contracting practices of any local jurisdiction.\n(g) This section shall be construed so as not to conflict with applicable federal laws, rules, or regulations. In the event that a court or agency of competent jurisdiction holds that federal law, rule, or regulation invalidates any clause, sentence, paragraph, or section of this code or the application thereof to any person or circumstances, it is the intent of the state that the court or agency sever that clause, sentence, paragraph, or section so that the remainder of this section shall remain in effect.\nSEC. 2.\nSection 10295.35 of the Public Contract Code shall not be construed to create any new enforcement authority or responsibility in the Department of General Services or any other contracting agency.\nSEC. 3.\nNo reimbursement is required by this act pursuant to Section 6 of Article XIII\u2009B of the California Constitution because the only costs that may be incurred by a local agency or school district will be incurred because this act creates a new crime or infraction, eliminates a crime or infraction, or changes the penalty for a crime or infraction, within the meaning of Section 17556 of the Government Code, or changes the definition of a crime within the meaning of Section 6 of Article XIII\u2009B of the California Constitution.',
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a1f95fe2a70aafeba345525968cc6ed7d84a574
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_3.txt
@@ -0,0 +1 @@
+'title': 'An act to add Section 10295.35 to the Public Contract Code, relating to public contracts.'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4f0195f7ac43d1216d02ddb5aa6b8736859721f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_4.txt
@@ -0,0 +1,41 @@
+There are two fields that you'll want to use:
+
+text: the text of the bill which'll be the input to the model.
+summary: a condensed version of text which'll be the model target.
+
+Preprocess
+The next step is to load a T5 tokenizer to process text and summary:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a summarization task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Use the keyword text_target argument when tokenizing labels.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+prefix = "summarize: "
+def preprocess_function(examples):
+     inputs = [prefix + doc for doc in examples["text"]]
+     model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
+
+     labels = tokenizer(text_target=examples["summary"], max_length=128, truncation=True)
+     model_inputs["labels"] = labels["input_ids"]
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_billsum = billsum.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the ROUGE metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c7b5996876fe42f12e2a46dbb2b7ded1e97d83c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_5.txt
@@ -0,0 +1,32 @@
+import evaluate
+rouge = evaluate.load("rouge")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the ROUGE metric:
+
+import numpy as np
+def compute_metrics(eval_pred):
+     predictions, labels = eval_pred
+     decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+
+     result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
+     result["gen_len"] = np.mean(prediction_lens)
+     return {k: round(v, 4) for k, v in result.items()}
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the ROUGE metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aecfcc9b4f18b0728271ada1ca96d68c4912f69
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_6.txt
@@ -0,0 +1,59 @@
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_billsum_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=4,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_billsum["train"],
+     eval_dataset=tokenized_billsum["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer, AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_billsum["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_billsum["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9d0ad1115f8897f39d96b207207b6f5a0599ba77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_7.txt
@@ -0,0 +1,38 @@
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the ROUGE score from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_billsum_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for summarization, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to summarize. For T5, you need to prefix your input depending on the task you're working on. For summarization you should prefix your input as shown below:
+
+text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b8090be027779c545e0749cf495e9b92f388dea
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_8.txt
@@ -0,0 +1,35 @@
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for summarization with your model, and pass your text to it:
+
+from transformers import pipeline
+summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
+summarizer(text)
+[{"summary_text": "The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country."}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_billsum_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ad3c3bc1def93429d23cb10d1e0bbb86654aa91
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_summarization.txt_chunk_9.txt
@@ -0,0 +1,10 @@
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the summarization. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("stevhliu/my_awesome_billsum_model")
+outputs = model.generate(inputs, max_new_tokens=100, do_sample=False)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'the inflation reduction act lowers prescription drug costs, health care costs, and energy costs. it's the most aggressive action on tackling the climate crisis in american history. it will ask the ultra-wealthy and corporations to pay their fair share.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49e97761b2b69a5ccfbae37b8c1009ab7581b2cb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_0.txt
@@ -0,0 +1,33 @@
+Text to speech
+[[open-in-colab]]
+Text-to-speech (TTS) is the task of creating natural-sounding speech from text, where the speech can be generated in multiple 
+languages and for multiple speakers. Several text-to-speech models are currently available in 🤗 Transformers, such as 
+Bark, MMS, VITS and SpeechT5. 
+You can easily generate audio using the "text-to-audio" pipeline (or its alias - "text-to-speech"). Some models, like Bark, 
+can also be conditioned to generate non-verbal communications such as laughing, sighing and crying, or even add music.
+Here's an example of how you would use the "text-to-speech" pipeline with Bark: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="suno/bark-small")
+text = "[clears throat] This is a test  and I just took a long pause."
+output = pipe(text)
+
+Here's a code snippet you can use to listen to the resulting audio in a notebook: 
+thon
+
+from IPython.display import Audio
+Audio(output["audio"], rate=output["sampling_rate"])
+
+For more examples on what Bark and other pretrained TTS models can do, refer to our 
+Audio course. 
+If you are looking to fine-tune a TTS model, the only text-to-speech models currently available in 🤗 Transformers 
+are SpeechT5 and FastSpeech2Conformer, though more will be added in the future. SpeechT5 is pre-trained on a combination of speech-to-text and text-to-speech data, allowing it to learn a unified space of hidden representations shared by both text and speech. This means that the same pre-trained model can be fine-tuned for different tasks. Furthermore, SpeechT5 supports multiple speakers through x-vector speaker embeddings. 
+The remainder of this guide illustrates how to:
+
+Fine-tune SpeechT5 that was originally trained on English speech on the Dutch (nl) language subset of the VoxPopuli dataset.
+Use your refined model for inference in one of two ways: using a pipeline or directly.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e10c0493935341708946ec3687508178d6d36a7e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_1.txt
@@ -0,0 +1,49 @@
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install datasets soundfile speechbrain accelerate
+Install 🤗Transformers from source as not all the SpeechT5 features have been merged into an official release yet:
+
+pip install git+https://github.com/huggingface/transformers.git
+
+To follow this guide you will need a GPU. If you're working in a notebook, run the following line to check if a GPU is available: 
+
+!nvidia-smi
+or alternatively for AMD GPUs:
+
+!rocm-smi
+
+We encourage you to log in to your Hugging Face account to upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load the dataset
+VoxPopuli is a large-scale multilingual speech corpus consisting of 
+data sourced from 2009-2020 European Parliament event recordings. It contains labelled audio-transcription data for 15 
+European languages. In this guide, we are using the Dutch language subset, feel free to pick another subset. 
+Note that VoxPopuli or any other automated speech recognition (ASR) dataset may not be the most suitable 
+option for training TTS models. The features that make it beneficial for ASR, such as excessive background noise, are 
+typically undesirable in TTS. However, finding top-quality, multilingual, and multi-speaker TTS datasets can be quite 
+challenging.
+Let's load the data:
+
+from datasets import load_dataset, Audio
+dataset = load_dataset("facebook/voxpopuli", "nl", split="train")
+len(dataset)
+20968
+
+20968 examples should be sufficient for fine-tuning. SpeechT5 expects audio data to have a sampling rate of 16 kHz, so 
+make sure the examples in the dataset meet this requirement:
+py
+dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
+Preprocess the data
+Let's begin by defining the model checkpoint to use and loading the appropriate processor: 
+
+from transformers import SpeechT5Processor
+checkpoint = "microsoft/speecht5_tts"
+processor = SpeechT5Processor.from_pretrained(checkpoint)
+
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_10.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_10.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60123864ec37e2949bc9e334effdb10fd0d8b169
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_10.txt
@@ -0,0 +1,39 @@
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
+ 
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+inputs = processor(text=text, return_tensors="pt")
+
+Create a spectrogram with your model: 
+
+spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
+
+Visualize the spectrogram, if you'd like to: 
+
+plt.figure()
+plt.imshow(spectrogram.T)
+plt.show()
+
+Finally, use the vocoder to turn the spectrogram into sound.
+
+with torch.no_grad():
+     speech = vocoder(spectrogram)
+from IPython.display import Audio
+Audio(speech.numpy(), rate=16000)
+
+In our experience, obtaining satisfactory results from this model can be challenging. The quality of the speaker 
+embeddings appears to be a significant factor. Since SpeechT5 was pre-trained with English x-vectors, it performs best 
+when using English speaker embeddings. If the synthesized speech sounds poor, try using a different speaker embedding.
+Increasing the training duration is also likely to enhance the quality of the results. Even so, the speech clearly is Dutch instead of English, and it does 
+capture the voice characteristics of the speaker (compare to the original audio in the example).
+Another thing to experiment with is the model's configuration. For example, try using config.reduction_factor = 1 to 
+see if this improves the results.
+Finally, it is essential to consider ethical considerations. Although TTS technology has numerous useful applications, it 
+may also be used for malicious purposes, such as impersonating someone's voice without their knowledge or consent. Please 
+use TTS judiciously and responsibly.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1c2e0fd932a47afb782a3e98ff5f33f589e1cd82
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_2.txt
@@ -0,0 +1,37 @@
+Text cleanup for SpeechT5 tokenization
+Start by cleaning up the text data. You'll need the tokenizer part of the processor to process the text:
+
+tokenizer = processor.tokenizer
+
+The dataset examples contain raw_text and normalized_text features. When deciding which feature to use as the text input, 
+consider that the SpeechT5 tokenizer doesn't have any tokens for numbers. In normalized_text the numbers are written 
+out as text. Thus, it is a better fit, and we recommend using    normalized_text as input text.
+Because SpeechT5 was trained on the English language, it may not recognize certain characters in the Dutch dataset. If 
+left as is, these characters will be converted to <unk> tokens. However, in Dutch, certain characters like à are 
+used to stress syllables. In order to preserve the meaning of the text, we can replace this character with a regular a.
+To identify unsupported tokens, extract all unique characters in the dataset using the SpeechT5Tokenizer which 
+works with characters as tokens. To do this, write the extract_all_chars mapping function that concatenates 
+the transcriptions from all examples into one string and converts it to a set of characters. 
+Make sure to set batched=True and batch_size=-1 in dataset.map() so that all transcriptions are available at once for 
+the mapping function.
+
+def extract_all_chars(batch):
+     all_text = " ".join(batch["normalized_text"])
+     vocab = list(set(all_text))
+     return {"vocab": [vocab], "all_text": [all_text]}
+vocabs = dataset.map(
+     extract_all_chars,
+     batched=True,
+     batch_size=-1,
+     keep_in_memory=True,
+     remove_columns=dataset.column_names,
+ )
+dataset_vocab = set(vocabs["vocab"][0])
+tokenizer_vocab = {k for k, _ in tokenizer.get_vocab().items()}
+
+Now you have two sets of characters: one with the vocabulary from the dataset and one with the vocabulary from the tokenizer. 
+To identify any unsupported characters in the dataset, you can take the difference between these two sets. The resulting 
+set will contain the characters that are in the dataset but not in the tokenizer.
+
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7530ab5d67199b457d3a7caac7219038bd9858fd
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_3.txt
@@ -0,0 +1,57 @@
+dataset_vocab - tokenizer_vocab
+{' ', 'à', 'ç', 'è', 'ë', 'í', 'ï', 'ö', 'ü'}
+
+To handle the unsupported characters identified in the previous step, define a function that maps these characters to 
+valid tokens. Note that spaces are already replaced by ▁ in the tokenizer and don't need to be handled separately.
+
+replacements = [
+     ("à", "a"),
+     ("ç", "c"),
+     ("è", "e"),
+     ("ë", "e"),
+     ("í", "i"),
+     ("ï", "i"),
+     ("ö", "o"),
+     ("ü", "u"),
+ ]
+def cleanup_text(inputs):
+     for src, dst in replacements:
+         inputs["normalized_text"] = inputs["normalized_text"].replace(src, dst)
+     return inputs
+dataset = dataset.map(cleanup_text)
+
+Now that you have dealt with special characters in the text, it's time to shift focus to the audio data.
+Speakers
+The VoxPopuli dataset includes speech from multiple speakers, but how many speakers are represented in the dataset? To 
+determine this, we can count the number of unique speakers and the number of examples each speaker contributes to the dataset. 
+With a total of 20,968 examples in the dataset, this information will give us a better understanding of the distribution of 
+speakers and examples in the data.
+
+from collections import defaultdict
+speaker_counts = defaultdict(int)
+for speaker_id in dataset["speaker_id"]:
+     speaker_counts[speaker_id] += 1
+
+By plotting a histogram you can get a sense of how much data there is for each speaker.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.hist(speaker_counts.values(), bins=20)
+plt.ylabel("Speakers")
+plt.xlabel("Examples")
+plt.show()
+
+The histogram reveals that approximately one-third of the speakers in the dataset have fewer than 100 examples, while 
+around ten speakers have more than 500 examples. To improve training efficiency and balance the dataset, we can limit 
+the data to speakers with between 100 and 400 examples. 
+
+def select_speaker(speaker_id):
+     return 100 <= speaker_counts[speaker_id] <= 400
+dataset = dataset.filter(select_speaker, input_columns=["speaker_id"])
+
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9de959885476c3194ad9821b7d0ba59203212dcb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_4.txt
@@ -0,0 +1,38 @@
+Let's check how many speakers remain: 
+
+len(set(dataset["speaker_id"]))
+42
+
+Let's see how many examples are left: 
+
+len(dataset)
+9973
+
+You are left with just under 10,000 examples from approximately 40 unique speakers, which should be sufficient.
+Note that some speakers with few examples may actually have more audio available if the examples are long. However, 
+determining the total amount of audio for each speaker requires scanning through the entire dataset, which is a 
+time-consuming process that involves loading and decoding each audio file. As such, we have chosen to skip this step here.
+Speaker embeddings
+To enable the TTS model to differentiate between multiple speakers, you'll need to create a speaker embedding for each example. 
+The speaker embedding is an additional input into the model that captures a particular speaker's voice characteristics.
+To generate these speaker embeddings, use the pre-trained spkrec-xvect-voxceleb 
+model from SpeechBrain. 
+Create a function create_speaker_embedding() that takes an input audio waveform and outputs a 512-element vector 
+containing the corresponding speaker embedding.
+
+import os
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+spk_model_name = "speechbrain/spkrec-xvect-voxceleb"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+speaker_model = EncoderClassifier.from_hparams(
+     source=spk_model_name,
+     run_opts={"device": device},
+     savedir=os.path.join("/tmp", spk_model_name),
+ )
+def create_speaker_embedding(waveform):
+     with torch.no_grad():
+         speaker_embeddings = speaker_model.encode_batch(torch.tensor(waveform))
+         speaker_embeddings = torch.nn.functional.normalize(speaker_embeddings, dim=2)
+         speaker_embeddings = speaker_embeddings.squeeze().cpu().numpy()
+     return speaker_embeddings
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2cd2b4ef1070ca60dc43b9da7f8bd775b7f8cba4
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_5.txt
@@ -0,0 +1,49 @@
+It's important to note that the speechbrain/spkrec-xvect-voxceleb model was trained on English speech from the VoxCeleb 
+dataset, whereas the training examples in this guide are in Dutch. While we believe that this model will still generate 
+reasonable speaker embeddings for our Dutch dataset, this assumption may not hold true in all cases.
+For optimal results, we recommend training an X-vector model on the target speech first. This will ensure that the model 
+is better able to capture the unique voice characteristics present in the Dutch language.
+Processing the dataset
+Finally, let's process the data into the format the model expects. Create a prepare_dataset function that takes in a 
+single example and uses the SpeechT5Processor object to tokenize the input text and load the target audio into a log-mel spectrogram. 
+It should also add the speaker embeddings as an additional input.
+
+def prepare_dataset(example):
+     audio = example["audio"]
+
+     example = processor(
+         text=example["normalized_text"],
+         audio_target=audio["array"],
+         sampling_rate=audio["sampling_rate"],
+         return_attention_mask=False,
+     )
+     # strip off the batch dimension
+     example["labels"] = example["labels"][0]
+     # use SpeechBrain to obtain x-vector
+     example["speaker_embeddings"] = create_speaker_embedding(audio["array"])
+     return example
+
+Verify the processing is correct by looking at a single example:
+
+processed_example = prepare_dataset(dataset[0])
+list(processed_example.keys())
+['input_ids', 'labels', 'stop_labels', 'speaker_embeddings']
+
+Speaker embeddings should be a 512-element vector:
+
+processed_example["speaker_embeddings"].shape
+(512,)
+
+The labels should be a log-mel spectrogram with 80 mel bins.
+
+import matplotlib.pyplot as plt
+plt.figure()
+plt.imshow(processed_example["labels"].T)
+plt.show()
+
+Side note: If you find this spectrogram confusing, it may be due to your familiarity with the convention of placing low frequencies 
+at the bottom and high frequencies at the top of a plot. However, when plotting spectrograms as an image using the matplotlib library, 
+the y-axis is flipped and the spectrograms appear upside down.
+Now apply the processing function to the entire dataset. This will take between 5 and 10 minutes.
+
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32cecc4c7382ade67ae7517b17f7de1cf0d6faee
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_6.txt
@@ -0,0 +1,26 @@
+dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
+
+You'll see a warning saying that some examples in the dataset are longer than the maximum input length the model can handle (600 tokens). 
+Remove those examples from the dataset. Here we go even further and to allow for larger batch sizes we remove anything over 200 tokens.
+
+def is_not_too_long(input_ids):
+     input_length = len(input_ids)
+     return input_length < 200
+dataset = dataset.filter(is_not_too_long, input_columns=["input_ids"])
+len(dataset)
+8259
+
+Next, create a basic train/test split: 
+
+dataset = dataset.train_test_split(test_size=0.1)
+
+Data collator
+In order to combine multiple examples into a batch, you need to define a custom data collator. This collator will pad shorter sequences with padding 
+tokens, ensuring that all examples have the same length. For the spectrogram labels, the padded portions are replaced with the special value -100. This special value 
+instructs the model to ignore that part of the spectrogram when calculating the spectrogram loss.
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a5d9951899d1e403b7b754c3f43c73433e7ddced
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_7.txt
@@ -0,0 +1,43 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+@dataclass
+ class TTSDataCollatorWithPadding:
+     processor: Any
+
+     def call(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+         input_ids = [{"input_ids": feature["input_ids"]} for feature in features]
+         label_features = [{"input_values": feature["labels"]} for feature in features]
+         speaker_features = [feature["speaker_embeddings"] for feature in features]
+         # collate the inputs and targets into a batch
+         batch = processor.pad(input_ids=input_ids, labels=label_features, return_tensors="pt")
+         # replace padding with -100 to ignore loss correctly
+         batch["labels"] = batch["labels"].masked_fill(batch.decoder_attention_mask.unsqueeze(-1).ne(1), -100)
+         # not used during fine-tuning
+         del batch["decoder_attention_mask"]
+         # round down target lengths to multiple of reduction factor
+         if model.config.reduction_factor > 1:
+             target_lengths = torch.tensor([len(feature["input_values"]) for feature in label_features])
+             target_lengths = target_lengths.new(
+                 [length - length % model.config.reduction_factor for length in target_lengths]
+             )
+             max_length = max(target_lengths)
+             batch["labels"] = batch["labels"][:, :max_length]
+         # also add in the speaker embeddings
+         batch["speaker_embeddings"] = torch.tensor(speaker_features)
+         return batch
+
+In SpeechT5, the input to the decoder part of the model is reduced by a factor 2. In other words, it throws away every 
+other timestep from the target sequence. The decoder then predicts a sequence that is twice as long. Since the original 
+target sequence length may be odd, the data collator makes sure to round the maximum length of the batch down to be a 
+multiple of 2.
+ 
+
+data_collator = TTSDataCollatorWithPadding(processor=processor)
+
+Train the model
+Load the pre-trained model from the same checkpoint as you used for loading the processor: 
+
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4c30b3c83db0475a7b3c69556803983ca895c4cf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_8.txt
@@ -0,0 +1,59 @@
+from transformers import SpeechT5ForTextToSpeech
+model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
+
+The use_cache=True option is incompatible with gradient checkpointing. Disable it for training.
+ 
+
+model.config.use_cache = False
+
+Define the training arguments. Here we are not computing any evaluation metrics during the training process. Instead, we'll 
+only look at the loss:
+thon
+
+from transformers import Seq2SeqTrainingArguments
+training_args = Seq2SeqTrainingArguments(
+     output_dir="speecht5_finetuned_voxpopuli_nl",  # change to a repo name of your choice
+     per_device_train_batch_size=4,
+     gradient_accumulation_steps=8,
+     learning_rate=1e-5,
+     warmup_steps=500,
+     max_steps=4000,
+     gradient_checkpointing=True,
+     fp16=True,
+     eval_strategy="steps",
+     per_device_eval_batch_size=2,
+     save_steps=1000,
+     eval_steps=1000,
+     logging_steps=25,
+     report_to=["tensorboard"],
+     load_best_model_at_end=True,
+     greater_is_better=False,
+     label_names=["labels"],
+     push_to_hub=True,
+ )
+
+Instantiate the Trainer object  and pass the model, dataset, and data collator to it.
+
+from transformers import Seq2SeqTrainer
+trainer = Seq2SeqTrainer(
+     args=training_args,
+     model=model,
+     train_dataset=dataset["train"],
+     eval_dataset=dataset["test"],
+     data_collator=data_collator,
+     tokenizer=processor,
+ )
+
+And with that, you're ready to start training! Training will take several hours. Depending on your GPU, 
+it is possible that you will encounter a CUDA "out-of-memory" error when you start training. In this case, you can reduce 
+the per_device_train_batch_size incrementally by factors of 2 and increase gradient_accumulation_steps by 2x to compensate.
+
+trainer.train()
+
+To be able to use your checkpoint with a pipeline, make sure to save the processor with the checkpoint: 
+
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d033722879d4d683278ad8e552d5c39b57fd1c5c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_text-to-speech.txt_chunk_9.txt
@@ -0,0 +1,51 @@
+processor.save_pretrained("YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Push the final model to the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Inference with a pipeline
+Great, now that you've fine-tuned a model, you can use it for inference!
+First, let's see how you can use it with a corresponding pipeline. Let's create a "text-to-speech" pipeline with your 
+checkpoint: 
+
+from transformers import pipeline
+pipe = pipeline("text-to-speech", model="YOUR_ACCOUNT_NAME/speecht5_finetuned_voxpopuli_nl")
+
+Pick a piece of text in Dutch you'd like narrated, e.g.:
+
+text = "hallo allemaal, ik praat nederlands. groetjes aan iedereen!"
+
+To use SpeechT5 with the pipeline, you'll need a speaker embedding. Let's get it from an example in the test dataset: 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Now you can pass the text and speaker embeddings to the pipeline, and it will take care of the rest: 
+
+forward_params = {"speaker_embeddings": speaker_embeddings}
+output = pipe(text, forward_params=forward_params)
+output
+{'audio': array([-6.82714235e-05, -4.26525949e-04,  1.06134125e-04, ,
+        -1.22392643e-03, -7.76011671e-04,  3.29112721e-04], dtype=float32),
+ 'sampling_rate': 16000}
+
+You can then listen to the result:
+
+from IPython.display import Audio
+Audio(output['audio'], rate=output['sampling_rate']) 
+
+Run inference manually
+You can achieve the same inference results without using the pipeline, however, more steps will be required. 
+Load the model from the 🤗 Hub: 
+
+model = SpeechT5ForTextToSpeech.from_pretrained("YOUR_ACCOUNT/speecht5_finetuned_voxpopuli_nl")
+
+Pick an example from the test dataset obtain a speaker embedding. 
+ 
+
+example = dataset["test"][304]
+speaker_embeddings = torch.tensor(example["speaker_embeddings"]).unsqueeze(0)
+
+Define the input text and tokenize it.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1afad4f1bc41126aa7ae98828fdfe983cfce1c7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_0.txt
@@ -0,0 +1,34 @@
+Token classification
+[[open-in-colab]]
+
+Token classification assigns a label to individual tokens in a sentence. One of the most common token classification tasks is Named Entity Recognition (NER). NER attempts to find a label for each entity in a sentence, such as a person, location, or organization.
+This guide will show you how to:
+
+Finetune DistilBERT on the WNUT 17 dataset to detect new entities.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate seqeval
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load WNUT 17 dataset
+Start by loading the WNUT 17 dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+wnut = load_dataset("wnut_17")
+
+Then take a look at an example:
+
+wnut["train"][0]
+{'id': '0',
+ 'ner_tags': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'tokens': ['@paulwalk', 'It', "'s", 'the', 'view', 'from', 'where', 'I', "'m", 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'ESB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.']
+}
+
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7b92ed5a4b521591e98690c3d426605accf3582a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_1.txt
@@ -0,0 +1,43 @@
+Each number in ner_tags represents an entity. Convert the numbers to their label names to find out what the entities are:
+
+label_list = wnut["train"].features[f"ner_tags"].feature.names
+label_list
+[
+    "O",
+    "B-corporation",
+    "I-corporation",
+    "B-creative-work",
+    "I-creative-work",
+    "B-group",
+    "I-group",
+    "B-location",
+    "I-location",
+    "B-person",
+    "I-person",
+    "B-product",
+    "I-product",
+]
+
+The letter that prefixes each ner_tag indicates the token position of the entity:
+
+B- indicates the beginning of an entity.
+I- indicates a token is contained inside the same entity (for example, the State token is a part of an entity like
+  Empire State Building).
+0 indicates the token doesn't correspond to any entity.
+
+Preprocess
+
+The next step is to load a DistilBERT tokenizer to preprocess the tokens field:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")
+
+As you saw in the example tokens field above, it looks like the input has already been tokenized. But the input actually hasn't been tokenized yet and you'll need to set is_split_into_words=True to tokenize the words into subwords. For example:
+
+example = wnut["train"][0]
+tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
+tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
+tokens
+['[CLS]', '@', 'paul', '##walk', 'it', "'", 's', 'the', 'view', 'from', 'where', 'i', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'empire', 'state', 'building', '=', 'es', '##b', '.', 'pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
+
+However, this adds some special tokens [CLS] and [SEP] and the subword tokenization creates a mismatch between the input and labels. A single word corresponding to a single label may now be split into two subwords. You'll need to realign the tokens and labels by:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30b35825b8e3a1ef7bfcffd140016ede80d0a7a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_2.txt
@@ -0,0 +1,38 @@
+Mapping all tokens to their corresponding word with the word_ids method.
+Assigning the label -100 to the special tokens [CLS] and [SEP] so they're ignored by the PyTorch loss function (see CrossEntropyLoss).
+Only labeling the first token of a given word. Assign -100 to other subtokens from the same word.
+
+Here is how you can create a function to realign the tokens and labels, and truncate sequences to be no longer than DistilBERT's maximum input length:
+
+def tokenize_and_align_labels(examples):
+     tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+
+     labels = []
+     for i, label in enumerate(examples[f"ner_tags"]):
+         word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
+         previous_word_idx = None
+         label_ids = []
+         for word_idx in word_ids:  # Set the special tokens to -100.
+             if word_idx is None:
+                 label_ids.append(-100)
+             elif word_idx != previous_word_idx:  # Only label the first token of a given word.
+                 label_ids.append(label[word_idx])
+             else:
+                 label_ids.append(-100)
+             previous_word_idx = word_idx
+         labels.append(label_ids)
+     tokenized_inputs["labels"] = labels
+     return tokenized_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
+
+Now create a batch of examples using [DataCollatorWithPadding]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
+</pt>
+<tf>py
+from transformers import DataCollatorForTokenClassification
+data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors="tf")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa105f44ec958be22fcd69a0491e67bbcfa451e5
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_3.txt
@@ -0,0 +1,33 @@
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the seqeval framework (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric). Seqeval actually produces several scores: precision, recall, F1, and accuracy.
+
+import evaluate
+seqeval = evaluate.load("seqeval")
+
+Get the NER labels first, and then create a function that passes your true predictions and true labels to [~evaluate.EvaluationModule.compute] to calculate the scores:
+
+import numpy as np
+labels = [label_list[i] for i in example[f"ner_tags"]]
+def compute_metrics(p):
+     predictions, labels = p
+     predictions = np.argmax(predictions, axis=2)
+
+     true_predictions = [
+         [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     true_labels = [
+         [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+         for prediction, label in zip(predictions, labels)
+     ]
+     results = seqeval.compute(predictions=true_predictions, references=true_labels)
+     return {
+         "precision": results["overall_precision"],
+         "recall": results["overall_recall"],
+         "f1": results["overall_f1"],
+         "accuracy": results["overall_accuracy"],
+     }
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..745f07cd3a8bcfca6b8336dc651f994da6ae4d60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_4.txt
@@ -0,0 +1,45 @@
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+Before you start training your model, create a map of the expected ids to their labels with id2label and label2id:
+
+id2label = {
+     0: "O",
+     1: "B-corporation",
+     2: "I-corporation",
+     3: "B-creative-work",
+     4: "I-creative-work",
+     5: "B-group",
+     6: "I-group",
+     7: "B-location",
+     8: "I-location",
+     9: "B-person",
+     10: "I-person",
+     11: "B-product",
+     12: "I-product",
+ }
+label2id = {
+     "O": 0,
+     "B-corporation": 1,
+     "I-corporation": 2,
+     "B-creative-work": 3,
+     "I-creative-work": 4,
+     "B-group": 5,
+     "I-group": 6,
+     "B-location": 7,
+     "I-location": 8,
+     "B-person": 9,
+     "I-person": 10,
+     "B-product": 11,
+     "I-product": 12,
+ }
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load DistilBERT with [AutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
+model = AutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39b9d0ff00f84657e610d4aed11e32151a68fe30
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_5.txt
@@ -0,0 +1,49 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the seqeval scores and save the training checkpoint.
+Pass the training arguments to [Trainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = TrainingArguments(
+     output_dir="my_awesome_wnut_model",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     num_train_epochs=2,
+     weight_decay=0.01,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     load_best_model_at_end=True,
+     push_to_hub=True,
+ )
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_wnut["train"],
+     eval_dataset=tokenized_wnut["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import create_optimizer
+batch_size = 16
+num_train_epochs = 3
+num_train_steps = (len(tokenized_wnut["train"]) // batch_size) * num_train_epochs
+optimizer, lr_schedule = create_optimizer(
+     init_lr=2e-5,
+     num_train_steps=num_train_steps,
+     weight_decay_rate=0.01,
+     num_warmup_steps=0,
+ )
+
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0735cab98c883dfe3c2b2d00b0bee9f5f2fae405
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_6.txt
@@ -0,0 +1,50 @@
+Then you can load DistilBERT with [TFAutoModelForTokenClassification] along with the number of expected labels, and the label mappings:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained(
+     "distilbert/distilbert-base-uncased", num_labels=13, id2label=id2label, label2id=label2id
+ )
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_wnut["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_validation_set = model.prepare_tf_dataset(
+     tokenized_wnut["validation"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the seqeval scores from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_wnut_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..471fe895143f30f8ab2d22cc0141ae70eeab8eeb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_7.txt
@@ -0,0 +1,57 @@
+model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for token classification, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Grab some text you'd like to run inference on:
+
+text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for NER with your model, and pass your text to it:
+
+from transformers import pipeline
+classifier = pipeline("ner", model="stevhliu/my_awesome_wnut_model")
+classifier(text)
+[{'entity': 'B-location',
+  'score': 0.42658573,
+  'index': 2,
+  'word': 'golden',
+  'start': 4,
+  'end': 10},
+ {'entity': 'I-location',
+  'score': 0.35856336,
+  'index': 3,
+  'word': 'state',
+  'start': 11,
+  'end': 16},
+ {'entity': 'B-group',
+  'score': 0.3064001,
+  'index': 4,
+  'word': 'warriors',
+  'start': 17,
+  'end': 25},
+ {'entity': 'B-location',
+  'score': 0.65523505,
+  'index': 13,
+  'word': 'san',
+  'start': 80,
+  'end': 83},
+ {'entity': 'B-location',
+  'score': 0.4668663,
+  'index': 14,
+  'word': 'francisco',
+  'start': 84,
+  'end': 93}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f85222d787c3d0d0e1a42ad229d552c32cea1a7
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_token_classification.txt_chunk_8.txt
@@ -0,0 +1,70 @@
+Tokenize the text and return PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="pt")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import AutoModelForTokenClassification
+model = AutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+with torch.no_grad():
+     logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predictions = torch.argmax(logits, dim=2)
+predicted_token_class = [model.config.id2label[t.item()] for t in predictions[0]]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
+
+Tokenize the text and return TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("stevhliu/my_awesome_wnut_model")
+inputs = tokenizer(text, return_tensors="tf")
+
+Pass your inputs to the model and return the logits:
+
+from transformers import TFAutoModelForTokenClassification
+model = TFAutoModelForTokenClassification.from_pretrained("stevhliu/my_awesome_wnut_model")
+logits = model(**inputs).logits
+
+Get the class with the highest probability, and use the model's id2label mapping to convert it to a text label:
+
+predicted_token_class_ids = tf.math.argmax(logits, axis=-1)
+predicted_token_class = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()]
+predicted_token_class
+['O',
+ 'O',
+ 'B-location',
+ 'I-location',
+ 'B-group',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'O',
+ 'B-location',
+ 'B-location',
+ 'O',
+ 'O']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5719ac2ed874ec1e96f59673d00b63308e5e2823
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_0.txt
@@ -0,0 +1,46 @@
+Translation
+[[open-in-colab]]
+
+Translation converts a sequence of text from one language to another. It is one of several tasks you can formulate as a sequence-to-sequence problem, a powerful framework for returning some output from an input, like translation or summarization. Translation systems are commonly used for translation between different language texts, but it can also be used for speech or some combination in between like text-to-speech or speech-to-text.
+This guide will show you how to:
+
+Finetune T5 on the English-French subset of the OPUS Books dataset to translate English text to French.
+Use your finetuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install transformers datasets evaluate sacrebleu
+We encourage you to login to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to login:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load OPUS Books dataset
+Start by loading the English-French subset of the OPUS Books dataset from the 🤗 Datasets library:
+
+from datasets import load_dataset
+books = load_dataset("opus_books", "en-fr")
+
+Split the dataset into a train and test set with the [~datasets.Dataset.train_test_split] method:
+
+books = books["train"].train_test_split(test_size=0.2)
+
+Then take a look at an example:
+
+books["train"][0]
+{'id': '90560',
+ 'translation': {'en': 'But this lofty plateau measured only a few fathoms, and soon we reentered Our Element.',
+  'fr': 'Mais ce plateau élevé ne mesurait que quelques toises, et bientôt nous fûmes rentrés dans notre élément.'}}
+
+translation: an English and French translation of the text.
+Preprocess
+
+The next step is to load a T5 tokenizer to process the English-French language pairs:
+
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11e74006d30a0eb31659da8201a8b2fb3542f3fc
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_1.txt
@@ -0,0 +1,38 @@
+from transformers import AutoTokenizer
+checkpoint = "google-t5/t5-small"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+The preprocessing function you want to create needs to:
+
+Prefix the input with a prompt so T5 knows this is a translation task. Some models capable of multiple NLP tasks require prompting for specific tasks.
+Tokenize the input (English) and target (French) separately because you can't tokenize French text with a tokenizer pretrained on an English vocabulary.
+Truncate sequences to be no longer than the maximum length set by the max_length parameter.
+
+source_lang = "en"
+target_lang = "fr"
+prefix = "translate English to French: "
+def preprocess_function(examples):
+     inputs = [prefix + example[source_lang] for example in examples["translation"]]
+     targets = [example[target_lang] for example in examples["translation"]]
+     model_inputs = tokenizer(inputs, text_target=targets, max_length=128, truncation=True)
+     return model_inputs
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] method. You can speed up the map function by setting batched=True to process multiple elements of the dataset at once:
+
+tokenized_books = books.map(preprocess_function, batched=True)
+
+Now create a batch of examples using [DataCollatorForSeq2Seq]. It's more efficient to dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length.
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)
+
+from transformers import DataCollatorForSeq2Seq
+data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint, return_tensors="tf")
+
+Evaluate
+Including a metric during training is often helpful for evaluating your model's performance. You can quickly load a evaluation method with the 🤗 Evaluate library. For this task, load the SacreBLEU metric (see the 🤗 Evaluate quick tour to learn more about how to load and compute a metric):
+
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7402de4c8fdc077d12be92ef144ece69729b35db
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_2.txt
@@ -0,0 +1,39 @@
+import evaluate
+metric = evaluate.load("sacrebleu")
+
+Then create a function that passes your predictions and labels to [~evaluate.EvaluationModule.compute] to calculate the SacreBLEU score:
+
+import numpy as np
+def postprocess_text(preds, labels):
+     preds = [pred.strip() for pred in preds]
+     labels = [[label.strip()] for label in labels]
+
+     return preds, labels
+
+def compute_metrics(eval_preds):
+     preds, labels = eval_preds
+     if isinstance(preds, tuple):
+         preds = preds[0]
+     decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
+
+     labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+     decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+     decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
+     result = metric.compute(predictions=decoded_preds, references=decoded_labels)
+     result = {"bleu": result["score"]}
+     prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
+     result["gen_len"] = np.mean(prediction_lens)
+     result = {k: round(v, 4) for k, v in result.items()}
+     return result
+
+Your compute_metrics function is ready to go now, and you'll return to it when you setup your training.
+Train
+
+If you aren't familiar with finetuning a model with the [Trainer], take a look at the basic tutorial here!
+
+You're ready to start training your model now! Load T5 with [AutoModelForSeq2SeqLM]:
+
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b489ddcc0dadd73e363f8c737cc3cae1cf65a0a8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_3.txt
@@ -0,0 +1,45 @@
+from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
+model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [Seq2SeqTrainingArguments]. The only required parameter is output_dir which specifies where to save your model. You'll push this model to the Hub by setting push_to_hub=True (you need to be signed in to Hugging Face to upload your model). At the end of each epoch, the [Trainer] will evaluate the SacreBLEU metric and save the training checkpoint.
+Pass the training arguments to [Seq2SeqTrainer] along with the model, dataset, tokenizer, data collator, and compute_metrics function.
+Call [~Trainer.train] to finetune your model.
+
+training_args = Seq2SeqTrainingArguments(
+     output_dir="my_awesome_opus_books_model",
+     eval_strategy="epoch",
+     learning_rate=2e-5,
+     per_device_train_batch_size=16,
+     per_device_eval_batch_size=16,
+     weight_decay=0.01,
+     save_total_limit=3,
+     num_train_epochs=2,
+     predict_with_generate=True,
+     fp16=True,
+     push_to_hub=True,
+ )
+trainer = Seq2SeqTrainer(
+     model=model,
+     args=training_args,
+     train_dataset=tokenized_books["train"],
+     eval_dataset=tokenized_books["test"],
+     tokenizer=tokenizer,
+     data_collator=data_collator,
+     compute_metrics=compute_metrics,
+ )
+trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+If you aren't familiar with finetuning a model with Keras, take a look at the basic tutorial here!
+
+To finetune a model in TensorFlow, start by setting up an optimizer function, learning rate schedule, and some training hyperparameters:
+
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be8d481f61a761785bf564a367681e067f11b90e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_4.txt
@@ -0,0 +1,51 @@
+from transformers import AdamWeightDecay
+optimizer = AdamWeightDecay(learning_rate=2e-5, weight_decay_rate=0.01)
+
+Then you can load T5 with [TFAutoModelForSeq2SeqLM]:
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+
+Convert your datasets to the tf.data.Dataset format with [~transformers.TFPreTrainedModel.prepare_tf_dataset]:
+
+tf_train_set = model.prepare_tf_dataset(
+     tokenized_books["train"],
+     shuffle=True,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+tf_test_set = model.prepare_tf_dataset(
+     tokenized_books["test"],
+     shuffle=False,
+     batch_size=16,
+     collate_fn=data_collator,
+ )
+
+Configure the model for training with compile. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:
+
+import tensorflow as tf
+model.compile(optimizer=optimizer)  # No loss argument!
+
+The last two things to setup before you start training is to compute the SacreBLEU metric from the predictions, and provide a way to push your model to the Hub. Both are done by using Keras callbacks.
+Pass your compute_metrics function to [~transformers.KerasMetricCallback]:
+
+from transformers.keras_callbacks import KerasMetricCallback
+metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)
+
+Specify where to push your model and tokenizer in the [~transformers.PushToHubCallback]:
+
+from transformers.keras_callbacks import PushToHubCallback
+push_to_hub_callback = PushToHubCallback(
+     output_dir="my_awesome_opus_books_model",
+     tokenizer=tokenizer,
+ )
+
+Then bundle your callbacks together:
+
+callbacks = [metric_callback, push_to_hub_callback]
+
+Finally, you're ready to start training your model! Call fit with your training and validation datasets, the number of epochs, and your callbacks to finetune the model:
+
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..125512d794e92a3b1cf98a23ba9cc31dbdda292f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_5.txt
@@ -0,0 +1,39 @@
+model.fit(x=tf_train_set, validation_data=tf_test_set, epochs=3, callbacks=callbacks)
+
+Once training is completed, your model is automatically uploaded to the Hub so everyone can use it!
+
+For a more in-depth example of how to finetune a model for translation, take a look at the corresponding
+PyTorch notebook
+or TensorFlow notebook.
+
+Inference
+Great, now that you've finetuned a model, you can use it for inference!
+Come up with some text you'd like to translate to another language. For T5, you need to prefix your input depending on the task you're working on. For translation from English to French, you should prefix your input as shown below:
+
+text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
+
+The simplest way to try out your finetuned model for inference is to use it in a [pipeline]. Instantiate a pipeline for translation with your model, and pass your text to it:
+
+from transformers import pipeline
+
+Change xx to the language of the input and yy to the language of the desired output.
+Examples: "en" for English, "fr" for French, "de" for German, "es" for Spanish, "zh" for Chinese, etc; translation_en_to_fr translates English to French
+You can view all the lists of languages here - https://huggingface.co/languages
+
+translator = pipeline("translation_xx_to_yy", model="my_awesome_opus_books_model")
+translator(text)
+[{'translation_text': 'Legumes partagent des ressources avec des bactéries azotantes.'}]
+
+You can also manually replicate the results of the pipeline if you'd like:
+
+Tokenize the text and return the input_ids as PyTorch tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="pt").input_ids
+
+Use the [~generation.GenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import AutoModelForSeq2SeqLM
+model = AutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..830ab090e1f1c3b63576edc28d6eca6a973c7978
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_translation.txt_chunk_6.txt
@@ -0,0 +1,23 @@
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lignées partagent des ressources avec des bactéries enfixant l'azote.'
+``
+</pt>
+<tf>
+Tokenize the text and return theinput_ids` as TensorFlow tensors:
+
+from transformers import AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained("my_awesome_opus_books_model")
+inputs = tokenizer(text, return_tensors="tf").input_ids
+
+Use the [~transformers.generation_tf_utils.TFGenerationMixin.generate] method to create the translation. For more details about the different text generation strategies and parameters for controlling generation, check out the Text Generation API.
+
+from transformers import TFAutoModelForSeq2SeqLM
+model = TFAutoModelForSeq2SeqLM.from_pretrained("my_awesome_opus_books_model")
+outputs = model.generate(inputs, max_new_tokens=40, do_sample=True, top_k=30, top_p=0.95)
+
+Decode the generated token ids back into text:
+
+tokenizer.decode(outputs[0], skip_special_tokens=True)
+'Les lugumes partagent les ressources avec des bactéries fixatrices d'azote.'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aba5b251cb7f73da3a24104681c8849a6d2e3e0
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Video classification
+[[open-in-colab]]
+Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to. These models can be used to categorize what a video is all about. A real-world application of video classification is action / activity recognition, which is useful for fitness applications. It is also helpful for vision-impaired individuals, especially when they are commuting.
+This guide will show you how to:
+
+Fine-tune VideoMAE on a subset of the UCF101 dataset.
+Use your fine-tuned model for inference.
+
+To see all architectures and checkpoints compatible with this task, we recommend checking the task-page.
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q pytorchvideo transformers evaluate
+You will use PyTorchVideo (dubbed pytorchvideo) to process and prepare the videos.
+We encourage you to log in to your Hugging Face account so you can upload and share your model with the community. When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Load UCF101 dataset
+Start by loading a subset of the UCF-101 dataset. This will give you a chance to experiment and make sure everything works before spending more time training on the full dataset.
+
+from huggingface_hub import hf_hub_download
+hf_dataset_identifier = "sayakpaul/ucf101-subset"
+filename = "UCF101_subset.tar.gz"
+file_path = hf_hub_download(repo_id=hf_dataset_identifier, filename=filename, repo_type="dataset")
+
+After the subset has been downloaded, you need to extract the compressed archive:
+ 
+
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..96ce3b87232a1b831ef127a935515712340ea133
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_1.txt
@@ -0,0 +1,68 @@
+import tarfile
+with tarfile.open(file_path) as t:
+      t.extractall(".")
+
+At a high level, the dataset is organized like so:
+
+UCF101_subset/
+    train/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    val/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+    test/
+        BandMarching/
+            video_1.mp4
+            video_2.mp4
+            
+        Archery
+            video_1.mp4
+            video_2.mp4
+            
+        
+You can then count the number of total videos.
+ 
+
+import pathlib
+dataset_root_path = "UCF101_subset"
+dataset_root_path = pathlib.Path(dataset_root_path)
+
+ 
+
+video_count_train = len(list(dataset_root_path.glob("train//.avi")))
+video_count_val = len(list(dataset_root_path.glob("val//.avi")))
+video_count_test = len(list(dataset_root_path.glob("test//.avi")))
+video_total = video_count_train + video_count_val + video_count_test
+print(f"Total videos: {video_total}")
+
+ 
+
+all_video_file_paths = (
+     list(dataset_root_path.glob("train//.avi"))
+     + list(dataset_root_path.glob("val//.avi"))
+     + list(dataset_root_path.glob("test//.avi"))
+  )
+all_video_file_paths[:5]
+
+The (sorted) video paths appear like so:
+
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c04.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g07_c06.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c02.avi',
+'UCF101_subset/train/ApplyEyeMakeup/v_ApplyEyeMakeup_g09_c06.avi'
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b02b442d965caf03eeea8be92e950dda50e8f9e1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_2.txt
@@ -0,0 +1,32 @@
+You will notice that there are video clips belonging to the same group / scene where group is denoted by g in the video file paths. v_ApplyEyeMakeup_g07_c04.avi and v_ApplyEyeMakeup_g07_c06.avi, for example.
+For the validation and evaluation splits, you wouldn't want to have video clips from the same group / scene to prevent data leakage. The subset that you are using in this tutorial takes this information into account.
+Next up, you will derive the set of labels present in the dataset. Also, create two dictionaries that'll be helpful when initializing the model:
+
+label2id: maps the class names to integers.
+id2label: maps the integers to class names. 
+
+ 
+
+class_labels = sorted({str(path).split("/")[2] for path in all_video_file_paths})
+label2id = {label: i for i, label in enumerate(class_labels)}
+id2label = {i: label for label, i in label2id.items()}
+print(f"Unique classes: {list(label2id.keys())}.")
+
+Unique classes: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress'].
+
+There are 10 unique classes. For each class, there are 30 videos in the training set.
+Load a model to fine-tune
+Instantiate a video classification model from a pretrained checkpoint and its associated image processor. The model's encoder comes with pre-trained parameters, and the classification head is randomly initialized. The image processor will come in handy when writing the preprocessing pipeline for our dataset.
+ 
+
+from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
+model_ckpt = "MCG-NJU/videomae-base"
+image_processor = VideoMAEImageProcessor.from_pretrained(model_ckpt)
+model = VideoMAEForVideoClassification.from_pretrained(
+     model_ckpt,
+     label2id=label2id,
+     id2label=id2label,
+     ignore_mismatched_sizes=True,  # provide this in case you're planning to fine-tune an already fine-tuned checkpoint
+ )
+
+While the model is loading, you might notice the following warning:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e7b72eff258374fb136554b9be1873e0068bc60
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_3.txt
@@ -0,0 +1,29 @@
+While the model is loading, you might notice the following warning:
+
+Some weights of the model checkpoint at MCG-NJU/videomae-base were not used when initializing VideoMAEForVideoClassification: [, 'decoder.decoder_layers.1.attention.output.dense.bias', 'decoder.decoder_layers.2.attention.attention.key.weight']
+- This IS expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing VideoMAEForVideoClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+Some weights of VideoMAEForVideoClassification were not initialized from the model checkpoint at MCG-NJU/videomae-base and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+The warning is telling us we are throwing away some weights (e.g. the weights and bias of the classifier layer) and randomly initializing some others (the weights and bias of a new classifier layer). This is expected in this case, because we are adding a new head for which we don't have pretrained weights, so the library warns us we should fine-tune this model before using it for inference, which is exactly what we are going to do.
+Note that this checkpoint leads to better performance on this task as the checkpoint was obtained fine-tuning on a similar downstream task having considerable domain overlap. You can check out this checkpoint which was obtained by fine-tuning MCG-NJU/videomae-base-finetuned-kinetics.  
+Prepare the datasets for training
+For preprocessing the videos, you will leverage the PyTorchVideo library. Start by importing the dependencies we need. 
+ 
+
+import pytorchvideo.data
+from pytorchvideo.transforms import (
+     ApplyTransformToKey,
+     Normalize,
+     RandomShortSideScale,
+     RemoveKey,
+     ShortSideScale,
+     UniformTemporalSubsample,
+ )
+from torchvision.transforms import (
+     Compose,
+     Lambda,
+     RandomCrop,
+     RandomHorizontalFlip,
+     Resize,
+ )
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..527b41c554ee68ada97feb03055befb1ebf0e014
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_4.txt
@@ -0,0 +1,49 @@
+For the training dataset transformations, use a combination of uniform temporal subsampling, pixel normalization, random cropping, and random horizontal flipping. For the validation and evaluation dataset transformations, keep the same transformation chain except for random cropping and horizontal flipping. To learn more about the details of these transformations check out the official documentation of PyTorchVideo.  
+Use the image_processor associated with the pre-trained model to obtain the following information:
+
+Image mean and standard deviation with which the video frame pixels will be normalized.
+Spatial resolution to which the video frames will be resized.
+
+Start by defining some constants.
+
+mean = image_processor.image_mean
+std = image_processor.image_std
+if "shortest_edge" in image_processor.size:
+     height = width = image_processor.size["shortest_edge"]
+else:
+     height = image_processor.size["height"]
+     width = image_processor.size["width"]
+resize_to = (height, width)
+num_frames_to_sample = model.config.num_frames
+sample_rate = 4
+fps = 30
+clip_duration = num_frames_to_sample * sample_rate / fps
+
+Now, define the dataset-specific transformations and the datasets respectively. Starting with the training set: 
+ 
+
+train_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     RandomShortSideScale(min_size=256, max_size=320),
+                     RandomCrop(resize_to),
+                     RandomHorizontalFlip(p=0.5),
+                 ]
+             ),
+         ),
+     ]
+ )
+train_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "train"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("random", clip_duration),
+     decode_audio=False,
+     transform=train_transform,
+ )
+
+The same sequence of workflow can be applied to the validation and evaluation sets:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d9416e0747c416cc839e164fee1ff6459f8c76f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_5.txt
@@ -0,0 +1,39 @@
+The same sequence of workflow can be applied to the validation and evaluation sets: 
+ 
+
+val_transform = Compose(
+     [
+         ApplyTransformToKey(
+             key="video",
+             transform=Compose(
+                 [
+                     UniformTemporalSubsample(num_frames_to_sample),
+                     Lambda(lambda x: x / 255.0),
+                     Normalize(mean, std),
+                     Resize(resize_to),
+                 ]
+             ),
+         ),
+     ]
+ )
+val_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "val"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+test_dataset = pytorchvideo.data.Ucf101(
+     data_path=os.path.join(dataset_root_path, "test"),
+     clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", clip_duration),
+     decode_audio=False,
+     transform=val_transform,
+ )
+
+Note: The above dataset pipelines are taken from the official PyTorchVideo example. We're using the pytorchvideo.data.Ucf101() function because it's tailored for the UCF-101 dataset. Under the hood, it returns a pytorchvideo.data.labeled_video_dataset.LabeledVideoDataset object. LabeledVideoDataset class is the base class for all things video in the PyTorchVideo dataset. So, if you want to use a custom dataset not supported off-the-shelf by PyTorchVideo, you can extend the LabeledVideoDataset class accordingly. Refer to the data API documentation to learn more. Also, if your dataset follows a similar structure (as shown above), then using the pytorchvideo.data.Ucf101() should work just fine. 
+You can access the num_videos argument to know the number of videos in the dataset.
+
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..66930bc194f070e1e80070e8fd173725d9729434
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_6.txt
@@ -0,0 +1,36 @@
+print(train_dataset.num_videos, val_dataset.num_videos, test_dataset.num_videos)
+
+(300, 30, 75)
+
+Visualize the preprocessed video for better debugging
+ 
+
+import imageio
+import numpy as np
+from IPython.display import Image
+def unnormalize_img(img):
+     """Un-normalizes the image pixels."""
+     img = (img * std) + mean
+     img = (img * 255).astype("uint8")
+     return img.clip(0, 255)
+def create_gif(video_tensor, filename="sample.gif"):
+     """Prepares a GIF from a video tensor.
+   
+     The video tensor is expected to have the following shape:
+     (num_frames, num_channels, height, width).
+     """
+     frames = []
+     for video_frame in video_tensor:
+         frame_unnormalized = unnormalize_img(video_frame.permute(1, 2, 0).numpy())
+         frames.append(frame_unnormalized)
+     kargs = {"duration": 0.25}
+     imageio.mimsave(filename, frames, "GIF", **kargs)
+     return filename
+def display_gif(video_tensor, gif_name="sample.gif"):
+     """Prepares and displays a GIF from a video tensor."""
+     video_tensor = video_tensor.permute(1, 0, 2, 3)
+     gif_filename = create_gif(video_tensor, gif_name)
+     return Image(filename=gif_filename)
+sample_video = next(iter(train_dataset))
+video_tensor = sample_video["video"]
+display_gif(video_tensor)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3779cf1cc8ecb32d7ef944ce512688362fcf0b8d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_7.txt
@@ -0,0 +1,33 @@
+Train the model
+Leverage Trainer from  🤗 Transformers for training the model. To instantiate a Trainer, you need to define the training configuration and an evaluation metric. The most important is the TrainingArguments, which is a class that contains all the attributes to configure the training. It requires an output folder name, which will be used to save the checkpoints of the model. It also helps sync all the information in the model repository on 🤗 Hub.
+Most of the training arguments are self-explanatory, but one that is quite important here is remove_unused_columns=False. This one will drop any features not used by the model's call function. By default it's True because usually it's ideal to drop unused feature columns, making it easier to unpack inputs into the model's call function. But, in this case, you need the unused features ('video' in particular) in order to create pixel_values (which is a mandatory key our model expects in its inputs).
+ 
+
+from transformers import TrainingArguments, Trainer
+model_name = model_ckpt.split("/")[-1]
+new_model_name = f"{model_name}-finetuned-ucf101-subset"
+num_epochs = 4
+args = TrainingArguments(
+     new_model_name,
+     remove_unused_columns=False,
+     eval_strategy="epoch",
+     save_strategy="epoch",
+     learning_rate=5e-5,
+     per_device_train_batch_size=batch_size,
+     per_device_eval_batch_size=batch_size,
+     warmup_ratio=0.1,
+     logging_steps=10,
+     load_best_model_at_end=True,
+     metric_for_best_model="accuracy",
+     push_to_hub=True,
+     max_steps=(train_dataset.num_videos // batch_size) * num_epochs,
+ )
+
+The dataset returned by pytorchvideo.data.Ucf101() doesn't implement the __len__ method. As such, we must define max_steps when instantiating TrainingArguments. 
+Next, you need to define a function to compute the metrics from the predictions, which will use the metric you'll load now. The only preprocessing you have to do is to take the argmax of our predicted logits:
+
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_8.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_8.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5744e5b0a9e7df836553290e358b4b52346cd0b8
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_8.txt
@@ -0,0 +1,50 @@
+import evaluate
+metric = evaluate.load("accuracy")
+def compute_metrics(eval_pred):
+    predictions = np.argmax(eval_pred.predictions, axis=1)
+    return metric.compute(predictions=predictions, references=eval_pred.label_ids)
+
+A note on evaluation:
+In the VideoMAE paper, the authors use the following evaluation strategy. They evaluate the model on several clips from test videos and apply different crops to those clips and report the aggregate score. However, in the interest of simplicity and brevity, we don't consider that in this tutorial.
+Also, define a collate_fn, which will be used to batch examples together. Each batch consists of 2 keys, namely pixel_values and labels.
+ 
+
+def collate_fn(examples):
+     # permute to (num_frames, num_channels, height, width)
+     pixel_values = torch.stack(
+         [example["video"].permute(1, 0, 2, 3) for example in examples]
+     )
+     labels = torch.tensor([example["label"] for example in examples])
+     return {"pixel_values": pixel_values, "labels": labels}
+
+Then you just pass all of this along with the datasets to Trainer:
+ 
+
+trainer = Trainer(
+     model,
+     args,
+     train_dataset=train_dataset,
+     eval_dataset=val_dataset,
+     tokenizer=image_processor,
+     compute_metrics=compute_metrics,
+     data_collator=collate_fn,
+ )
+
+You might wonder why you passed along the image_processor as a tokenizer when you preprocessed the data already. This is only to make sure the image processor configuration file (stored as JSON) will also be uploaded to the repo on the Hub.
+Now fine-tune our model by calling the train method:
+ 
+
+train_results = trainer.train()
+
+Once training is completed, share your model to the Hub with the [~transformers.Trainer.push_to_hub] method so everyone can use your model:
+
+trainer.push_to_hub()
+
+Inference
+Great, now that you have fine-tuned a model, you can use it for inference!
+Load a video for inference:
+ 
+
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_9.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_9.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a8b97cc65b0f54852808716875ecdb6a78d2fbb
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_video_classification.txt_chunk_9.txt
@@ -0,0 +1,46 @@
+sample_test_video = next(iter(test_dataset))
+
+The simplest way to try out your fine-tuned model for inference is to use it in a pipeline. Instantiate a pipeline for video classification with your model, and pass your video to it:
+
+from transformers import pipeline
+video_cls = pipeline(model="my_awesome_video_cls_model")
+video_cls("https://huggingface.co/datasets/sayakpaul/ucf101-subset/resolve/main/v_BasketballDunk_g14_c06.avi")
+[{'score': 0.9272987842559814, 'label': 'BasketballDunk'},
+ {'score': 0.017777055501937866, 'label': 'BabyCrawling'},
+ {'score': 0.01663011871278286, 'label': 'BalanceBeam'},
+ {'score': 0.009560945443809032, 'label': 'BandMarching'},
+ {'score': 0.0068979403004050255, 'label': 'BaseballPitch'}]
+
+You can also manually replicate the results of the pipeline if you'd like.
+
+def run_inference(model, video):
+     # (num_frames, num_channels, height, width)
+     perumuted_sample_test_video = video.permute(1, 0, 2, 3)
+     inputs = {
+         "pixel_values": perumuted_sample_test_video.unsqueeze(0),
+         "labels": torch.tensor(
+             [sample_test_video["label"]]
+         ),  # this can be skipped if you don't have labels available.
+     }
+
+     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+     inputs = {k: v.to(device) for k, v in inputs.items()}
+     model = model.to(device)
+     # forward pass
+     with torch.no_grad():
+         outputs = model(**inputs)
+         logits = outputs.logits
+     return logits
+
+Now, pass your input to the model and return the logits:
+
+logits = run_inference(trained_model, sample_test_video["video"])
+
+Decoding the logits, we get: 
+ 
+
+predicted_class_idx = logits.argmax(-1).item()
+print("Predicted class:", model.config.id2label[predicted_class_idx])
+
+Predicted class: BasketballDunk
+```
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4f144f224ce2e502b119b181873ac1579b26c77
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Visual Question Answering
+[[open-in-colab]]
+Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. 
+The input to models supporting this task is typically a combination of an image and a question, and the output is an 
+answer expressed in natural language.
+Some noteworthy use case examples for VQA include:
+* Accessibility applications for visually impaired individuals.
+* Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites.
+* Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. 
+* Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask "Is there a dog?" to find all images with dogs from a set of images.
+In this guide you'll learn how to:
+
+Fine-tune a classification VQA model, specifically ViLT, on the Graphcore/vqa dataset.
+Use your fine-tuned ViLT for inference.
+Run zero-shot VQA inference with a generative model, like BLIP-2.
+
+Fine-tuning ViLT
+ViLT model incorporates text embeddings into a Vision Transformer (ViT), allowing it to have a minimal design for 
+Vision-and-Language Pre-training (VLP). This model can be used for several downstream tasks. For the VQA task, a classifier 
+head is placed on top (a linear layer on top of the final hidden state of the [CLS] token) and randomly initialized. 
+Visual Question Answering is thus treated as a classification problem.
+More recent models, such as BLIP, BLIP-2, and InstructBLIP, treat VQA as a generative task. Later in this guide we 
+illustrate how to use them for zero-shot VQA inference. 
+Before you begin, make sure you have all the necessary libraries installed. 
+
+pip install -q transformers datasets
+We encourage you to share your model with the community. Log in to your Hugging Face account to upload it to the 🤗 Hub.
+When prompted, enter your token to log in:
+
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0432bcecad8dce55c139f7963a90a02c87a4fdf
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_1.txt
@@ -0,0 +1,47 @@
+from huggingface_hub import notebook_login
+notebook_login()
+
+Let's define the model checkpoint as a global variable.
+
+model_checkpoint = "dandelin/vilt-b32-mlm"
+
+Load the data
+For illustration purposes, in this guide we use a very small sample of the annotated visual question answering Graphcore/vqa dataset. 
+You can find the full dataset on 🤗 Hub.
+As an alternative to the Graphcore/vqa dataset, you can download the 
+same data manually from the official VQA dataset page. If you prefer to follow the 
+tutorial with your custom data, check out how to Create an image dataset
+guide in the 🤗 Datasets documentation.  
+Let's load the first 200 examples from the validation split and explore the dataset's features:  
+thon
+
+from datasets import load_dataset
+dataset = load_dataset("Graphcore/vqa", split="validation[:200]")
+dataset
+Dataset({
+    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
+    num_rows: 200
+})
+
+Let's take a look at an example to understand the dataset's features:
+
+dataset[0]
+{'question': 'Where is he looking?',
+ 'question_type': 'none of the above',
+ 'question_id': 262148000,
+ 'image_id': '/root/.cache/huggingface/datasets/downloads/extracted/ca733e0e000fb2d7a09fbcc94dbfe7b5a30750681d0e965f8e0a23b1c2f98c75/val2014/COCO_val2014_000000262148.jpg',
+ 'answer_type': 'other',
+ 'label': {'ids': ['at table', 'down', 'skateboard', 'table'],
+  'weights': [0.30000001192092896,
+   1.0,
+   0.30000001192092896,
+   0.30000001192092896]}}
+
+The features relevant to the task include: 
+* question: the question to be answered from the image
+* image_id: the path to the image the question refers to
+* label: the annotations
+We can remove the rest of the features as they won't be necessary: 
+ 
+
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..920d3e009ecb9d408b05dd8da93406b36130b2f6
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_2.txt
@@ -0,0 +1,29 @@
+dataset = dataset.remove_columns(['question_type', 'question_id', 'answer_type'])
+
+As you can see, the label feature contains several answers to the same question (called ids here) collected by different human annotators. 
+This is because the answer to a question can be subjective. In this case, the question is "where is he looking?". Some people 
+annotated this with "down", others with "at table", another one with "skateboard", etc. 
+Take a look at the image and consider which answer would you give:
+thon
+
+from PIL import Image
+image = Image.open(dataset[0]['image_id'])
+image
+
+Due to the questions' and answers' ambiguity, datasets like this are treated as a multi-label classification problem (as 
+multiple answers are possibly valid). Moreover, rather than just creating a one-hot encoded vector, one creates a 
+soft encoding, based on the number of times a certain answer appeared in the annotations.
+For instance, in the example above, because the answer "down" is selected way more often than other answers, it has a 
+score (called weight in the dataset) of 1.0, and the rest of the answers have scores < 1.0. 
+To later instantiate the model with an appropriate classification head, let's create two dictionaries: one that maps 
+the label name to an integer and vice versa:
+
+import itertools
+labels = [item['ids'] for item in dataset['label']]
+flattened_labels = list(itertools.chain(*labels))
+unique_labels = list(set(flattened_labels))
+label2id = {label: idx for idx, label in enumerate(unique_labels)}
+id2label = {idx: label for label, idx in label2id.items()} 
+
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e166656751568ec76b5a24cfe5ea589a436a8888
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_3.txt
@@ -0,0 +1,35 @@
+Now that we have the mappings, we can replace the string answers with their ids, and flatten the dataset for a more convenient further preprocessing. 
+thon
+
+def replace_ids(inputs):
+   inputs["label"]["ids"] = [label2id[x] for x in inputs["label"]["ids"]]
+   return inputs
+dataset = dataset.map(replace_ids)
+flat_dataset = dataset.flatten()
+flat_dataset.features
+{'question': Value(dtype='string', id=None),
+ 'image_id': Value(dtype='string', id=None),
+ 'label.ids': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
+ 'label.weights': Sequence(feature=Value(dtype='float64', id=None), length=-1, id=None)}
+
+Preprocessing data
+The next step is to load a ViLT processor to prepare the image and text data for the model. 
+[ViltProcessor] wraps a BERT tokenizer and ViLT image processor into a convenient single processor:
+ 
+
+from transformers import ViltProcessor
+processor = ViltProcessor.from_pretrained(model_checkpoint)
+
+To preprocess the data we need to encode the images and questions using the [ViltProcessor]. The processor will use 
+the [BertTokenizerFast] to tokenize the text and create input_ids, attention_mask and token_type_ids for the text data. 
+As for images, the processor will leverage [ViltImageProcessor] to resize and normalize the image, and create pixel_values and pixel_mask.
+All these preprocessing steps are done under the hood, we only need to call the processor. However, we still need to 
+prepare the target labels. In this representation, each element corresponds to a possible answer (label). For correct answers, the element holds 
+their respective score (weight), while the remaining elements are set to zero.
+The following function applies the processor to the images and questions and formats the labels as described above:
+
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4145d554101ea06a90c344ac15daeafd16ec2ee1
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_4.txt
@@ -0,0 +1,43 @@
+import torch
+def preprocess_data(examples):
+     image_paths = examples['image_id']
+     images = [Image.open(image_path) for image_path in image_paths]
+     texts = examples['question']    
+
+     encoding = processor(images, texts, padding="max_length", truncation=True, return_tensors="pt")
+     for k, v in encoding.items():
+           encoding[k] = v.squeeze()
+     targets = []
+     for labels, scores in zip(examples['label.ids'], examples['label.weights']):
+         target = torch.zeros(len(id2label))
+         for label, score in zip(labels, scores):
+             target[label] = score
+         targets.append(target)
+     encoding["labels"] = targets
+     return encoding
+
+To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.map] function. You can speed up map by 
+setting batched=True to process multiple elements of the dataset at once. At this point, feel free to remove the columns you don't need.
+
+processed_dataset = flat_dataset.map(preprocess_data, batched=True, remove_columns=['question','question_type',  'question_id', 'image_id', 'answer_type', 'label.ids', 'label.weights'])
+processed_dataset
+Dataset({
+    features: ['input_ids', 'token_type_ids', 'attention_mask', 'pixel_values', 'pixel_mask', 'labels'],
+    num_rows: 200
+})
+
+As a final step, create a batch of examples using [DefaultDataCollator]:
+
+from transformers import DefaultDataCollator
+data_collator = DefaultDataCollator()
+
+Train the model
+You’re ready to start training your model now! Load ViLT with [ViltForQuestionAnswering]. Specify the number of labels 
+along with the label mappings:
+
+from transformers import ViltForQuestionAnswering
+model = ViltForQuestionAnswering.from_pretrained(model_checkpoint, num_labels=len(id2label), id2label=id2label, label2id=label2id)
+
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fe9a70c0192111114705a32e5d08af9ef7d1fb2
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_5.txt
@@ -0,0 +1,54 @@
+At this point, only three steps remain:
+
+Define your training hyperparameters in [TrainingArguments]:
+
+from transformers import TrainingArguments
+repo_id = "MariaK/vilt_finetuned_200"
+training_args = TrainingArguments(
+     output_dir=repo_id,
+     per_device_train_batch_size=4,
+     num_train_epochs=20,
+     save_steps=200,
+     logging_steps=50,
+     learning_rate=5e-5,
+     save_total_limit=2,
+     remove_unused_columns=False,
+     push_to_hub=True,
+ )
+
+Pass the training arguments to [Trainer] along with the model, dataset, processor, and data collator.
+
+from transformers import Trainer
+trainer = Trainer(
+     model=model,
+     args=training_args,
+     data_collator=data_collator,
+     train_dataset=processed_dataset,
+     tokenizer=processor,
+ )
+
+Call [~Trainer.train] to finetune your model.
+
+trainer.train() 
+
+Once training is completed, share your model to the Hub with the [~Trainer.push_to_hub] method to share your final model on the 🤗 Hub:
+
+trainer.push_to_hub()
+
+Inference
+Now that you have fine-tuned a ViLT model, and uploaded it to the 🤗 Hub, you can use it for inference. The simplest
+way to try out your fine-tuned model for inference is to use it in a [Pipeline].
+
+from transformers import pipeline
+pipe = pipeline("visual-question-answering", model="MariaK/vilt_finetuned_200")
+
+The model in this guide has only been trained on 200 examples, so don't expect a lot from it. Let's see if it at least 
+learned something from the data and take the first example from the dataset to illustrate inference:
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+print(question)
+pipe(image, question, top_k=1)
+"Where is he looking?"
+[{'score': 0.5498199462890625, 'answer': 'down'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_6.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0aa62e5a398d7695671319833052eaa81edc7050
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_6.txt
@@ -0,0 +1,34 @@
+Even though not very confident, the model indeed has learned something. With more examples and longer training, you'll get far better results!
+You can also manually replicate the results of the pipeline if you'd like:
+1. Take an image and a question, prepare them for the model using the processor from your model.
+2. Forward the result or preprocessing through the model.
+3. From the logits, get the most likely answer's id, and find the actual answer in the id2label.
+
+processor = ViltProcessor.from_pretrained("MariaK/vilt_finetuned_200")
+image = Image.open(example['image_id'])
+question = example['question']
+prepare inputs
+inputs = processor(image, question, return_tensors="pt")
+model = ViltForQuestionAnswering.from_pretrained("MariaK/vilt_finetuned_200")
+forward pass
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits
+idx = logits.argmax(-1).item()
+print("Predicted answer:", model.config.id2label[idx])
+Predicted answer: down
+
+Zero-shot VQA
+The previous model treated VQA as a classification task. Some recent models, such as BLIP, BLIP-2, and InstructBLIP approach 
+VQA as a generative task. Let's take BLIP-2 as an example. It introduced a new visual-language pre-training 
+paradigm in which any combination of pre-trained vision encoder and LLM can be used (learn more in the BLIP-2 blog post). 
+This enables achieving state-of-the-art results on multiple visual-language tasks including visual question answering. 
+Let's illustrate how you can use this model for VQA. First, let's load the model. Here we'll explicitly send the model to a 
+GPU, if available, which we didn't need to do earlier when training, as [Trainer] handles this automatically: 
+
+from transformers import AutoProcessor, Blip2ForConditionalGeneration
+import torch
+processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_7.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_7.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8313f3ff3624115a8b2f7ae998ac52b8c740f49c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_visual_question_answering.txt_chunk_7.txt
@@ -0,0 +1,22 @@
+The model takes image and text as input, so let's use the exact same image/question pair from the first example in the VQA dataset: 
+ 
+
+example = dataset[0]
+image = Image.open(example['image_id'])
+question = example['question']
+
+To use BLIP-2 for visual question answering task, the textual prompt has to follow a specific format: Question: {} Answer:.
+
+prompt = f"Question: {question} Answer:" 
+
+Now we need to preprocess the image/prompt with the model's processor, pass the processed input through the model, and decode the output:
+
+inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
+generated_ids = model.generate(**inputs, max_new_tokens=10)
+generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+print(generated_text)
+"He is looking at the crowd" 
+
+As you can see, the model recognized the crowd, and the direction of the face (looking down), however, it seems to miss 
+the fact the crowd is behind the skater. Still, in cases where acquiring human-annotated datasets is not feasible, this 
+approach can quickly produce useful results.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf460b158092c59664d4a9f2a07af56c1427452f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_0.txt
@@ -0,0 +1,35 @@
+Zero-shot image classification
+[[open-in-colab]]
+Zero-shot image classification is a task that involves classifying images into different categories using a model that was
+not explicitly trained on data containing labeled examples from those specific categories.
+Traditionally, image classification requires training a model on a specific set of labeled images, and this model learns to
+"map" certain image features to labels. When there's a need to use such model for a classification task that introduces a
+new set of labels, fine-tuning is required to "recalibrate" the model.
+In contrast, zero-shot or open vocabulary image classification models are typically multi-modal models that have been trained on a large
+dataset of images and associated descriptions. These models learn aligned vision-language representations that can be used for many downstream tasks including zero-shot image classification.
+This is a more flexible approach to image classification that allows models to generalize to new and unseen categories
+without the need for additional training data and enables users to query images with free-form text descriptions of their target objects .
+In this guide you'll learn how to:
+
+create a zero-shot image classification pipeline
+run zero-shot image classification inference by hand
+
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q "transformers[torch]" pillow
+Zero-shot image classification pipeline
+The simplest way to try out inference with a model supporting zero-shot image classification is to use the corresponding [pipeline].
+Instantiate a pipeline from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "openai/clip-vit-large-patch14"
+detector = pipeline(model=checkpoint, task="zero-shot-image-classification")
+
+Next, choose an image you'd like to classify.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/g8oS8-82DxI/download?ixid=MnwxMjA3fDB8MXx0b3BpY3x8SnBnNktpZGwtSGt8fHx8fDJ8fDE2NzgxMDYwODc&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16247fc1d830758ed294ffb7c905c920afc9301c
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_1.txt
@@ -0,0 +1,40 @@
+Pass the image and the candidate object labels to the pipeline. Here we pass the image directly; other suitable options
+include a local path to an image or an image url.
+The candidate labels can be simple words like in this example, or more descriptive.
+
+predictions = detector(image, candidate_labels=["fox", "bear", "seagull", "owl"])
+predictions
+[{'score': 0.9996670484542847, 'label': 'owl'},
+ {'score': 0.000199399160919711, 'label': 'seagull'},
+ {'score': 7.392891711788252e-05, 'label': 'fox'},
+ {'score': 5.96074532950297e-05, 'label': 'bear'}]
+
+Zero-shot image classification by hand
+Now that you've seen how to use the zero-shot image classification pipeline, let's take a look how you can run zero-shot
+image classification manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
+model = AutoModelForZeroShotImageClassification.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+from PIL import Image
+import requests
+url = "https://unsplash.com/photos/xBRQfR2bqNI/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjc4Mzg4ODEx&force=true&w=640"
+image = Image.open(requests.get(url, stream=True).raw)
+image
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a tokenizer that takes care of the text inputs.
+
+candidate_labels = ["tree", "car", "bike", "cat"]
+
+follows the pipeline prompt template to get same results
+
+candidate_labels = [f'This is a photo of {label}.' for label in candidate_labels]
+inputs = processor(images=image, text=candidate_labels, return_tensors="pt", padding=True)
+
+Pass the inputs through the model, and post-process the results:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8956f31f446ff8f6b244bbc1b96e7608ca94091f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_image_classification.txt_chunk_2.txt
@@ -0,0 +1,17 @@
+Pass the inputs through the model, and post-process the results:
+
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+logits = outputs.logits_per_image[0]
+probs = logits.softmax(dim=-1).numpy()
+scores = probs.tolist()
+result = [
+     {"score": score, "label": candidate_label}
+     for score, candidate_label in sorted(zip(probs, candidate_labels), key=lambda x: -x[0])
+ ]
+result
+[{'score': 0.998572, 'label': 'car'},
+ {'score': 0.0010570387, 'label': 'bike'},
+ {'score': 0.0003393686, 'label': 'tree'},
+ {'score': 3.1572064e-05, 'label': 'cat'}]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..118b8a705a433635b5c3a2d20e351ae56c8a1e93
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_0.txt
@@ -0,0 +1,38 @@
+Zero-shot object detection
+[[open-in-colab]]
+Traditionally, models used for object detection require labeled image datasets for training,
+and are limited to detecting the set of classes from the training data.
+Zero-shot object detection is supported by the OWL-ViT model which uses a different approach. OWL-ViT
+is an open-vocabulary object detector. It means that it can detect objects in images based on free-text queries without
+the need to fine-tune the model on labeled datasets.
+OWL-ViT leverages multi-modal representations to perform open-vocabulary detection. It combines CLIP with
+lightweight object classification and localization heads. Open-vocabulary detection is achieved by embedding free-text queries with the text encoder of CLIP and using them as input to the object classification and localization heads.
+associate images and their corresponding textual descriptions, and ViT processes image patches as inputs. The authors
+of OWL-ViT first trained CLIP from scratch and then fine-tuned OWL-ViT end to end on standard object detection datasets using
+a bipartite matching loss.
+With this approach, the model can detect objects based on textual descriptions without prior training on labeled datasets.
+In this guide, you will learn how to use OWL-ViT:
+- to detect objects based on text prompts
+- for batch object detection
+- for image-guided object detection
+Before you begin, make sure you have all the necessary libraries installed:
+
+pip install -q transformers
+Zero-shot object detection pipeline
+The simplest way to try out inference with OWL-ViT is to use it in a [pipeline]. Instantiate a pipeline
+for zero-shot object detection from a checkpoint on the Hugging Face Hub:
+thon
+
+from transformers import pipeline
+checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
+
+Next, choose an image you'd like to detect objects in. Here we'll use the image of astronaut Eileen Collins that is
+a part of the NASA Great Images dataset.
+
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..54983412fe8893d34057f5def5b02f0bcadf637d
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_1.txt
@@ -0,0 +1,42 @@
+import skimage
+import numpy as np
+from PIL import Image
+image = skimage.data.astronaut()
+image = Image.fromarray(np.uint8(image)).convert("RGB")
+image
+
+Pass the image and the candidate object labels to look for to the pipeline.
+Here we pass the image directly; other suitable options include a local path to an image or an image url. We also pass text descriptions for all items we want to query the image for. 
+
+predictions = detector(
+     image,
+     candidate_labels=["human face", "rocket", "nasa badge", "star-spangled banner"],
+ )
+predictions
+[{'score': 0.3571370542049408,
+  'label': 'human face',
+  'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
+ {'score': 0.28099656105041504,
+  'label': 'nasa badge',
+  'box': {'xmin': 129, 'ymin': 348, 'xmax': 206, 'ymax': 427}},
+ {'score': 0.2110239565372467,
+  'label': 'rocket',
+  'box': {'xmin': 350, 'ymin': -1, 'xmax': 468, 'ymax': 288}},
+ {'score': 0.13790413737297058,
+  'label': 'star-spangled banner',
+  'box': {'xmin': 1, 'ymin': 1, 'xmax': 105, 'ymax': 509}},
+ {'score': 0.11950037628412247,
+  'label': 'nasa badge',
+  'box': {'xmin': 277, 'ymin': 338, 'xmax': 327, 'ymax': 380}},
+ {'score': 0.10649408400058746,
+  'label': 'rocket',
+  'box': {'xmin': 358, 'ymin': 64, 'xmax': 424, 'ymax': 280}}]
+
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..178ae737424ca7e043cc9d961b3bd0b6fb5bcdc9
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_2.txt
@@ -0,0 +1,41 @@
+Let's visualize the predictions:
+
+from PIL import ImageDraw
+draw = ImageDraw.Draw(image)
+for prediction in predictions:
+     box = prediction["box"]
+     label = prediction["label"]
+     score = prediction["score"]
+
+     xmin, ymin, xmax, ymax = box.values()
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="white")
+
+image
+
+Text-prompted zero-shot object detection by hand
+Now that you've seen how to use the zero-shot object detection pipeline, let's replicate the same
+result manually.
+Start by loading the model and associated processor from a checkpoint on the Hugging Face Hub.
+Here we'll use the same checkpoint as before:
+
+from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
+model = AutoModelForZeroShotObjectDetection.from_pretrained(checkpoint)
+processor = AutoProcessor.from_pretrained(checkpoint)
+
+Let's take a different image to switch things up.
+
+import requests
+url = "https://unsplash.com/photos/oj0zeY2Ltk4/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MTR8fHBpY25pY3xlbnwwfHx8fDE2Nzc0OTE1NDk&force=true&w=640"
+im = Image.open(requests.get(url, stream=True).raw)
+im
+
+Use the processor to prepare the inputs for the model. The processor combines an image processor that prepares the
+image for the model by resizing and normalizing it, and a [CLIPTokenizer] that takes care of the text inputs.
+
+text_queries = ["hat", "book", "sunglasses", "camera"]
+inputs = processor(text=text_queries, images=im, return_tensors="pt")
+
+Pass the inputs through the model, post-process, and visualize the results. Since the image processor resized images before
+feeding them to the model, you need to use the [~OwlViTImageProcessor.post_process_object_detection] method to make sure the predicted bounding
+boxes have the correct coordinates relative to the original image:
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78bbe09fd5ae16dbc659d1e60f9e64c67ec5de5a
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_3.txt
@@ -0,0 +1,30 @@
+import torch
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = torch.tensor([im.size[::-1]])
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(im)
+scores = results["scores"].tolist()
+labels = results["labels"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[label]}: {round(score,2)}", fill="white")
+im
+
+Batch processing
+You can pass multiple sets of images and text queries to search for different (or same) objects in several images.
+Let's use both an astronaut image and the beach image together.
+For batch processing, you should pass text queries as a nested list to the processor and images as lists of PIL images,
+PyTorch tensors, or NumPy arrays.
+
+images = [image, im]
+text_queries = [
+     ["human face", "rocket", "nasa badge", "star-spangled banner"],
+     ["hat", "book", "sunglasses", "camera"],
+ ]
+inputs = processor(text=text_queries, images=images, return_tensors="pt")
+
+Previously for post-processing you passed the single image's size as a tensor, but you can also pass a tuple, or, in case
+of several images, a list of tuples. Let's create predictions for the two examples, and visualize the second one (image_idx = 1).
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89a8a686f7ba985f0713b36c923b052d6cbec40e
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_4.txt
@@ -0,0 +1,40 @@
+with torch.no_grad():
+     outputs = model(**inputs)
+     target_sizes = [x.size[::-1] for x in images]
+     results = processor.post_process_object_detection(outputs, threshold=0.1, target_sizes=target_sizes)
+image_idx = 1
+draw = ImageDraw.Draw(images[image_idx])
+scores = results[image_idx]["scores"].tolist()
+labels = results[image_idx]["labels"].tolist()
+boxes = results[image_idx]["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="red", width=1)
+     draw.text((xmin, ymin), f"{text_queries[image_idx][label]}: {round(score,2)}", fill="white")
+images[image_idx]
+
+Image-guided object detection
+In addition to zero-shot object detection with text queries, OWL-ViT offers image-guided object detection. This means
+you can use an image query to find similar objects in the target image.
+Unlike text queries, only a single example image is allowed.
+Let's take an image with two cats on a couch as a target image, and an image of a single cat
+as a query:
+
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image_target = Image.open(requests.get(url, stream=True).raw)
+query_url = "http://images.cocodataset.org/val2017/000000524280.jpg"
+query_image = Image.open(requests.get(query_url, stream=True).raw)
+
+Let's take a quick look at the images:
+
+import matplotlib.pyplot as plt
+fig, ax = plt.subplots(1, 2)
+ax[0].imshow(image_target)
+ax[1].imshow(query_image)
+
+In the preprocessing step, instead of text queries, you now need to use query_images:
+
+inputs = processor(images=image_target, query_images=query_image, return_tensors="pt")
+
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
\ No newline at end of file
diff --git a/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c050a1ff3745e393da69311ecbc670fd734c094f
--- /dev/null
+++ b/sentence-transformers_all-MiniLM-L6-v2/recursive_chunks/tasks_zero_shot_object_detection.txt_chunk_5.txt
@@ -0,0 +1,14 @@
+For predictions, instead of passing the inputs to the model, pass them to [~OwlViTForObjectDetection.image_guided_detection]. Draw the predictions
+as before except now there are no labels.
+
+with torch.no_grad():
+     outputs = model.image_guided_detection(**inputs)
+     target_sizes = torch.tensor([image_target.size[::-1]])
+     results = processor.post_process_image_guided_detection(outputs=outputs, target_sizes=target_sizes)[0]
+draw = ImageDraw.Draw(image_target)
+scores = results["scores"].tolist()
+boxes = results["boxes"].tolist()
+for box, score, label in zip(boxes, scores, labels):
+     xmin, ymin, xmax, ymax = box
+     draw.rectangle((xmin, ymin, xmax, ymax), outline="white", width=4)
+image_target
\ No newline at end of file